1 Star 26 Fork 3

缪洲/lstm-cnn神经网络微博情感分析

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
bilibili热榜爬虫.py 4.17 KB
一键复制 编辑 原始数据 按行查看 历史
def maketable(x):
import requests
import re
import pandas as pd
url= x
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Edg/88.0.705.50'}
response = requests.get(url,headers=headers)
title = []
trs5 = re.findall('"title":"(.*?)"', response.text)
for p in trs5:
title.append(p.strip().replace(" / ", ""))
view=[]
trs5 = re.findall('"view":(.*?),',response.text)
for p in trs5:
view.append(p.strip().replace(" / ",""))
danmu=[]
trs5 = re.findall('"danmaku":(.*?),',response.text)
for p in trs5:
danmu.append(p.strip().replace(" / ",""))
reply=[]
trs5 = re.findall('"reply":(.*?),',response.text)
for p in trs5:
reply.append(p.strip().replace(" / ",""))
favourite=[]
trs5 = re.findall('"favorite":(.*?),',response.text)
for p in trs5:
favourite.append(p.strip().replace(" / ",""))
coin=[]
trs5 = re.findall('"coin":(.*?),',response.text)
for p in trs5:
coin.append(p.strip().replace(" / ",""))
share=[]
trs5 = re.findall('"share":(.*?),',response.text)
for p in trs5:
share.append(p.strip().replace(" / ",""))
now_rank=[]
trs5 = re.findall('"now_rank":(.*?),',response.text)
for p in trs5:
now_rank.append(p.strip().replace(" / ",""))
his_rank=[]
trs5 = re.findall('"his_rank":(.*?),',response.text)
for p in trs5:
his_rank.append(p.strip().replace(" / ",""))
like=[]
trs5 = re.findall('"like":(.*?),',response.text)
for p in trs5:
like.append(p.strip().replace(" / ",""))
cid=[]
trs5 = re.findall('"cid":(.*?),',response.text)
for p in trs5:
cid.append(p.strip().replace(" / ",""))
aid=[]
trs5 = re.findall('"aid":(.*?),',response.text)
for p in trs5:
aid.append(p.strip().replace(" / ",""))
aid=aid[1:40:2]
bilibili={
'视频名':title,
'播放量':view,
'弹幕数量':danmu,
'回复数量':reply,
'收藏数':favourite,
'投币数':coin,
'点赞数':like,
'分享数':share,
'历史最高排名':his_rank,
'cid':cid,
'aid':aid
}
df =pd.DataFrame(bilibili)
df.to_excel(r'C:\Users\86139\Desktop\bilibili.xlsx',index = False)
return df
def getdanmu(x):
import requests
from lxml import etree
import pandas as pd
import numpy as np
index = df[df["视频名"]== x].index.tolist()[0]
i=df.iloc[index,:]['cid']
i=int(i)
url=r'https://comment.bilibili.com/%s.xml'%(i)
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Edg/88.0.705.50'}
response = requests.get(url,headers=headers)
response.encoding = 'utf-8'
html = etree.HTML(response.text.encode('utf-8'))
result = html.xpath('//d/text()')
a=np.array(result)
a =pd.DataFrame(a)
return a
def getpinglun(x):
import requests
import re
import bs4
from lxml import etree
import numpy as np
import pandas as pd
index = df[df["视频名"]== x].index.tolist()[0]
i=df.iloc[index,:]['aid']
i=int(i)
review=[]
url='https://api.bilibili.com/x/v2/reply?pn=1&type=1&oid=%s&sort=1'%(i)
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Edg/88.0.705.50'}
response = requests.get(url,headers=headers)
response.encoding = 'utf-8'
review_1=re.findall('"message":"(.*?)"', response.text)
review+=review_1
a=np.array(review)
a =pd.DataFrame(a)
return a
"""
#首先运行第一个函数获得表格
#再输入视频的名字获得弹幕和评论,评论只获得了第一页的评论
x= r'https://api.bilibili.com/x/web-interface/popular?ps=20&pn=1'
maketable(x)
getdanmu("LOL让你全程发病的恐怖套路:哥谭的黑暗又回来了【有点骚东西】")
getpinglun("LOL让你全程发病的恐怖套路:哥谭的黑暗又回来了【有点骚东西】")
"""
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/miao-zhou/lstmcnn.git
git@gitee.com:miao-zhou/lstmcnn.git
miao-zhou
lstmcnn
lstm-cnn神经网络微博情感分析
master

搜索帮助

D67c1975 1850385 1daf7b77 1850385