textanalysis
/
polarityAnalysis.r

#调入分词的库
library("rJava")
library("Rwordseg")
#调入绘制词云的库
library("RColorBrewer")
library("wordcloud")
#读取xlsx文件的库
library("xlsx")

source("GetEmotionWords.r")

#文件变量
emotion_words_dic <- "./resources/emotion_words_dic.xlsx";
origin_file <- "./resources/SONY.txt";
stop_words_dic <- "./resources/stop_words_dic.txt";

#读取情感词汇
mydataframe <- read.xlsx(emotion_words_dic,  1, encoding="UTF-8", colIndex=c(1,5,6,7),
    colClasses=c("character", "character", "numeric", "numeric"), stringsAsFactors=FALSE)
print(mydataframe)
#读入数据(特别注意，read.csv竟然可以读取txt的文本)
myfile<-read.csv(origin_file,header=FALSE);
#预处理，这步可以将读入的文本转换为可以分词的字符，没有这步不能分词
myfile.res <- myfile[myfile!=" "]

#装载分词词典（如果需要的话，我这里没有装载，下面有说明）
#分词，并将分词结果转换为向量
myfile.words <- unlist(lapply(X = myfile.res,FUN = segmentCN))

#把分切之后的词变成列名为‘word‘的列
wordsframe <- data.frame(word=myfile.words);
#合并情感词典与切词之后的词
mergeframe <- merge(x = mydataframe, y = wordsframe, by = "word", add.x = TRUE);

#计算极性
positive <- 0;
negative <- 0;
neutral <- 0;
unknow <- 0;
for (i in 1: length(mergeframe$polarity)) {
    weight <- mergeframe$polarity[i];
    if (weight == 0) {
        neutral <- neutral + 1;
    } else if (weight == 1) {
        positive <- positive + 1;
    } else if (weight == 2) {
        negative <- negative + 1;
    } else {
        unknow <- unknow + 1;
    }
}
counter_list <- c(positive, negative, neutral, unknow);
print(counter_list)