代码拉取完成,页面将自动刷新
#调入分词的库
library("rJava")
library("Rwordseg")
#调入绘制词云的库
library("RColorBrewer")
library("wordcloud")
#读取xlsx文件的库
library("xlsx")
source("GetEmotionWords.r")
#文件变量
emotion_words_dic <- "./resources/emotion_words_dic.xlsx";
origin_file <- "./resources/SONY.txt";
stop_words_dic <- "./resources/stop_words_dic.txt";
#读取情感词汇
mydataframe <- read.xlsx(emotion_words_dic, 1, encoding="UTF-8", colIndex=c(1,5,6,7),
colClasses=c("character", "character", "numeric", "numeric"), stringsAsFactors=FALSE)
print(mydataframe)
#读入数据(特别注意,read.csv竟然可以读取txt的文本)
myfile<-read.csv(origin_file,header=FALSE);
#预处理,这步可以将读入的文本转换为可以分词的字符,没有这步不能分词
myfile.res <- myfile[myfile!=" "]
#装载分词词典(如果需要的话,我这里没有装载,下面有说明)
#分词,并将分词结果转换为向量
myfile.words <- unlist(lapply(X = myfile.res,FUN = segmentCN))
#把分切之后的词变成列名为‘word‘的列
wordsframe <- data.frame(word=myfile.words);
#合并情感词典与切词之后的词
mergeframe <- merge(x = mydataframe, y = wordsframe, by = "word", add.x = TRUE);
#计算极性
positive <- 0;
negative <- 0;
neutral <- 0;
unknow <- 0;
for (i in 1: length(mergeframe$polarity)) {
weight <- mergeframe$polarity[i];
if (weight == 0) {
neutral <- neutral + 1;
} else if (weight == 1) {
positive <- positive + 1;
} else if (weight == 2) {
negative <- negative + 1;
} else {
unknow <- unknow + 1;
}
}
counter_list <- c(positive, negative, neutral, unknow);
print(counter_list)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。