代码拉取完成,页面将自动刷新
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# @FileName :yichewang.py
# @Time :2023/11/23
# @Author :CL
# @email :1037654919@qq.com
import requests
from bs4 import BeautifulSoup
from utils import proxies
def get_data(url ='https://car.yiche.com/salesrank/?flag=2023' ):
headers = {
"authority": "car.yiche.com",
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "no-cache",
"pragma": "no-cache",
"referer": "https://car.yiche.com/salesrank/",
"sec-ch-ua": "\"Not.A/Brand\";v=\"8\", \"Chromium\";v=\"114\", \"Google Chrome\";v=\"114\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Linux\"",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "same-origin",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}
cookies = {
"CIGUID": "64694b3f-d3fe-4ddf-a0af-444cfc8e7680",
"CIGDCID": "jNPxKGpah4HrZdb3cnw7E2JD2rxYT8Zj",
"auto_id": "deb5a7576fe6327a8decd1b1134b0765",
"UserGuid": "64694b3f-d3fe-4ddf-a0af-444cfc8e7680",
"selectcity": "330200",
"selectcityid": "3002",
"selectcityName": "%E5%AE%81%E6%B3%A2",
"isWebP": "true",
"locatecity": "330200",
"bitauto_ipregion": "183.131.253.190%3A%E6%B5%99%E6%B1%9F%E7%9C%81%E5%AE%81%E6%B3%A2%E5%B8%82%3B3002%2C%E5%AE%81%E6%B3%A2%E5%B8%82%2Cningbo",
"Hm_lvt_610fee5a506c80c9e1a46aa9a2de2e44": "1699499337,1700707232",
"report-cookie-id": "174732699_1700707425604",
"Hm_lpvt_610fee5a506c80c9e1a46aa9a2de2e44": "1700708626"
}
response = requests.get(url, headers=headers, cookies=cookies,timeout=10)
# print(response.text)
print(response.url,response)
soups = BeautifulSoup(response.text, 'lxml')
datas = soups.find('div',class_='rk-list-box').find_all('div',recursive=False)
texts=[]
for data in datas:
xuhao = data.find('div',class_='rk-num db-i v-al-m').get_text().strip()
name = data.find('div',class_='rk-car-name').get_text().strip()
price = data.find('div',class_='rk-car-price').get_text().strip()
num = data.find('span',class_='rk-car-num').get_text().strip()
texts.append([xuhao,name,price,num])
# texts = [data.get_text().strip().split() for data in datas]
# print(texts)
return texts
url = 'https://car.yiche.com/salesrank/?flag=2023&page=73'
texts=[]
for page in range(1,74):
url = f'https://car.yiche.com/salesrank/?flag=2023&page={page}'
texts += get_data(url=url)
from pandas import DataFrame
df = DataFrame(texts,columns=['序号','车型','价格','销量'])
df.to_excel('yiche_sales.xlsx',index=False)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。