master

分支 (1)

管理

管理

master

public_data
/
yichewang.py

#!/usr/bin/env python
# -*- coding:utf-8 -*-
# @FileName  :yichewang.py
# @Time      :2023/11/23
# @Author    :CL
# @email     :1037654919@qq.com
import requests
from bs4 import BeautifulSoup

from utils import proxies
def get_data(url ='https://car.yiche.com/salesrank/?flag=2023' ):
    headers = {
        "authority": "car.yiche.com",
        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
        "accept-language": "zh-CN,zh;q=0.9",
        "cache-control": "no-cache",
        "pragma": "no-cache",
        "referer": "https://car.yiche.com/salesrank/",
        "sec-ch-ua": "\"Not.A/Brand\";v=\"8\", \"Chromium\";v=\"114\", \"Google Chrome\";v=\"114\"",
        "sec-ch-ua-mobile": "?0",
        "sec-ch-ua-platform": "\"Linux\"",
        "sec-fetch-dest": "document",
        "sec-fetch-mode": "navigate",
        "sec-fetch-site": "same-origin",
        "sec-fetch-user": "?1",
        "upgrade-insecure-requests": "1",
        "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
    }
    cookies = {
        "CIGUID": "64694b3f-d3fe-4ddf-a0af-444cfc8e7680",
        "CIGDCID": "jNPxKGpah4HrZdb3cnw7E2JD2rxYT8Zj",
        "auto_id": "deb5a7576fe6327a8decd1b1134b0765",
        "UserGuid": "64694b3f-d3fe-4ddf-a0af-444cfc8e7680",
        "selectcity": "330200",
        "selectcityid": "3002",
        "selectcityName": "%E5%AE%81%E6%B3%A2",
        "isWebP": "true",
        "locatecity": "330200",
        "bitauto_ipregion": "183.131.253.190%3A%E6%B5%99%E6%B1%9F%E7%9C%81%E5%AE%81%E6%B3%A2%E5%B8%82%3B3002%2C%E5%AE%81%E6%B3%A2%E5%B8%82%2Cningbo",
        "Hm_lvt_610fee5a506c80c9e1a46aa9a2de2e44": "1699499337,1700707232",
        "report-cookie-id": "174732699_1700707425604",
        "Hm_lpvt_610fee5a506c80c9e1a46aa9a2de2e44": "1700708626"
    }
    response = requests.get(url, headers=headers, cookies=cookies,timeout=10)

    # print(response.text)
    print(response.url,response)
    soups = BeautifulSoup(response.text, 'lxml')
    datas = soups.find('div',class_='rk-list-box').find_all('div',recursive=False)
    texts=[]
    for data in datas:
        xuhao = data.find('div',class_='rk-num db-i v-al-m').get_text().strip()
        name = data.find('div',class_='rk-car-name').get_text().strip()
        price = data.find('div',class_='rk-car-price').get_text().strip()
        num = data.find('span',class_='rk-car-num').get_text().strip()
        texts.append([xuhao,name,price,num])


    # texts = [data.get_text().strip().split() for data in datas]
    # print(texts)
    return texts

url = 'https://car.yiche.com/salesrank/?flag=2023&page=73'
texts=[]
for page in range(1,74):
    url = f'https://car.yiche.com/salesrank/?flag=2023&page={page}'
    texts += get_data(url=url)

from pandas import DataFrame
df = DataFrame(texts,columns=['序号','车型','价格','销量'])
df.to_excel('yiche_sales.xlsx',index=False)