代码拉取完成,页面将自动刷新
def maketable(x):
import requests
import re
import pandas as pd
url= x
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Edg/88.0.705.50'}
response = requests.get(url,headers=headers)
title = []
trs5 = re.findall('"title":"(.*?)"', response.text)
for p in trs5:
title.append(p.strip().replace(" / ", ""))
view=[]
trs5 = re.findall('"view":(.*?),',response.text)
for p in trs5:
view.append(p.strip().replace(" / ",""))
danmu=[]
trs5 = re.findall('"danmaku":(.*?),',response.text)
for p in trs5:
danmu.append(p.strip().replace(" / ",""))
reply=[]
trs5 = re.findall('"reply":(.*?),',response.text)
for p in trs5:
reply.append(p.strip().replace(" / ",""))
favourite=[]
trs5 = re.findall('"favorite":(.*?),',response.text)
for p in trs5:
favourite.append(p.strip().replace(" / ",""))
coin=[]
trs5 = re.findall('"coin":(.*?),',response.text)
for p in trs5:
coin.append(p.strip().replace(" / ",""))
share=[]
trs5 = re.findall('"share":(.*?),',response.text)
for p in trs5:
share.append(p.strip().replace(" / ",""))
now_rank=[]
trs5 = re.findall('"now_rank":(.*?),',response.text)
for p in trs5:
now_rank.append(p.strip().replace(" / ",""))
his_rank=[]
trs5 = re.findall('"his_rank":(.*?),',response.text)
for p in trs5:
his_rank.append(p.strip().replace(" / ",""))
like=[]
trs5 = re.findall('"like":(.*?),',response.text)
for p in trs5:
like.append(p.strip().replace(" / ",""))
cid=[]
trs5 = re.findall('"cid":(.*?),',response.text)
for p in trs5:
cid.append(p.strip().replace(" / ",""))
aid=[]
trs5 = re.findall('"aid":(.*?),',response.text)
for p in trs5:
aid.append(p.strip().replace(" / ",""))
aid=aid[1:40:2]
bilibili={
'视频名':title,
'播放量':view,
'弹幕数量':danmu,
'回复数量':reply,
'收藏数':favourite,
'投币数':coin,
'点赞数':like,
'分享数':share,
'历史最高排名':his_rank,
'cid':cid,
'aid':aid
}
df =pd.DataFrame(bilibili)
df.to_excel(r'C:\Users\86139\Desktop\bilibili.xlsx',index = False)
return df
def getdanmu(x):
import requests
from lxml import etree
import pandas as pd
import numpy as np
index = df[df["视频名"]== x].index.tolist()[0]
i=df.iloc[index,:]['cid']
i=int(i)
url=r'https://comment.bilibili.com/%s.xml'%(i)
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Edg/88.0.705.50'}
response = requests.get(url,headers=headers)
response.encoding = 'utf-8'
html = etree.HTML(response.text.encode('utf-8'))
result = html.xpath('//d/text()')
a=np.array(result)
a =pd.DataFrame(a)
return a
def getpinglun(x):
import requests
import re
import bs4
from lxml import etree
import numpy as np
import pandas as pd
index = df[df["视频名"]== x].index.tolist()[0]
i=df.iloc[index,:]['aid']
i=int(i)
review=[]
url='https://api.bilibili.com/x/v2/reply?pn=1&type=1&oid=%s&sort=1'%(i)
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Edg/88.0.705.50'}
response = requests.get(url,headers=headers)
response.encoding = 'utf-8'
review_1=re.findall('"message":"(.*?)"', response.text)
review+=review_1
a=np.array(review)
a =pd.DataFrame(a)
return a
"""
#首先运行第一个函数获得表格
#再输入视频的名字获得弹幕和评论,评论只获得了第一页的评论
x= r'https://api.bilibili.com/x/web-interface/popular?ps=20&pn=1'
maketable(x)
getdanmu("LOL让你全程发病的恐怖套路:哥谭的黑暗又回来了【有点骚东西】")
getpinglun("LOL让你全程发病的恐怖套路:哥谭的黑暗又回来了【有点骚东西】")
"""
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。