代码拉取完成,页面将自动刷新
同步操作将从 dasheng/weibonlp 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
# encoding:UTF-8
import pymysql,re
import jieba
import jieba.posseg as pseg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.misc import imread
from snownlp import SnowNLP
from wordcloud import WordCloud,ImageColorGenerator
from collections import Counter
def readmysql(): #读取数据库
commentlist = []
textlist = []
userlist = []
conn =pymysql.connect(host='服务器IP',user='用户名',password='密码',charset="utf8") #连接服务器
with conn:
cur = conn.cursor()
cur.execute("SELECT * FROM nlp.love_guan WHERE id < '%d'" % 10000)
rows = cur.fetchall()
for row in rows:
row = list(row)
del row[0]
if row not in commentlist:
commentlist.append([row[0],row[1],row[2],row[3],row[4],row[5]])
comment_id = row[0]
user_name = row[1]
userlist.append(user_name)
created_at = row[2]
text = row[3]
if text:
textlist.append(text)
likenum = row[4]
source = row[5]
# print("%d %s %s %s %s %s" % (comment_id,user_name,created_at,text,likenum,source))
return commentlist,userlist,textlist
def wordtocloud(textlist):
fulltext = ''
isCN = 1
back_coloring = imread("bg.jpg")
cloud = WordCloud(font_path='font.ttf', # 若是有中文的话,这句代码必须添加,不然会出现方框,不出现汉字
background_color="white", # 背景颜色
max_words=2000, # 词云显示的最大词数
mask=back_coloring, # 设置背景图片
max_font_size=100, # 字体最大值
random_state=42,
width=1000, height=860, margin=2,# 设置图片默认的大小,但是如果使用背景图片的话,那么保存的图片大小将会按照其大小保存,margin为词语边缘距离
)
for li in textlist:
fulltext += ' '.join(jieba.cut(li,cut_all = False))
wc = cloud.generate(fulltext)
image_colors = ImageColorGenerator(back_coloring)
plt.figure("wordc")
plt.imshow(wc.recolor(color_func=image_colors))
wc.to_file('微博评论词云.png')
def snowanalysis(textlist):
sentimentslist = []
for li in textlist:
s = SnowNLP(li)
# print(li)
# print(s.sentiments)
sentimentslist.append(s.sentiments)
fig1 = plt.figure("sentiment")
plt.hist(sentimentslist,bins=np.arange(0,1,0.02))
plt.show()
def emojilist(textlist):
emojilist = []
for li in textlist:
emojis = re.findall(re.compile(u'(\[.*?\])',re.S),li)
if emojis:
for emoji in emojis:
emojilist.append(emoji)
emojidict = Counter(emojilist)
print(emojidict)
def follows(textlist):
userdict = Counter(userlist)
print(userdict.most_common(20))
if __name__=='__main__':
#运行
commentlist,userlist,textlist = readmysql()
wordtocloud(textlist)
snowanalysis(textlist)
emojilist(textlist)
follows(textlist)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。