1 Star 0 Fork 0

毕业设计/online-judge-recommendation

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
Recommandation.py 4.76 KB
一键复制 编辑 原始数据 按行查看 历史
FeanLau 提交于 2020-03-22 11:31 . fix : bug
import pandas as pd
import numpy as np
from numpy import *
import os
from surprise import Dataset
from surprise import NormalPredictor
from surprise import Reader
from surprise.model_selection import cross_validate
from surprise import KNNBaseline
from surprise.model_selection import cross_validate
from surprise.model_selection import GridSearchCV
class Recommandation(object):
def __init__(self, filepath, k,user):
self.filepath = filepath
self.k = k
self.id = user
def getRecomMat(self):
'''
构造数据集
'''
file_path = os.path.expanduser(self.filepath)
reader = Reader(line_format='user item rating timestamp', sep=',')
user_data = Dataset.load_from_file(file_path, reader=reader)
print("构建数据集...")
trainset = user_data.build_full_trainset()
'''
训练模型
'''
# KNN-Baseline
print("开始训练模型...")
sim_options = {'name': 'pearson_baseline', 'user_based': False}
#sim_options = {'name': 'cosine', 'user_based': False}
algo1 = KNNBaseline(sim_options=sim_options)
ans_test = algo1.fit(trainset)
#cross_validate(algo1, user_data, measures=['RMSE', 'MAE'], cv=3, verbose=True)
'''
取出相似矩阵
'''
original_sim_matrix = ans_test.sim.copy()
'''
计算推荐矩阵
'''
# 转化为矩阵
sim_matrix = mat(original_sim_matrix) # 这里的相似矩阵是对应trainset的index
# 用户做题记录 1为写了 0为未写
size_item = len(original_sim_matrix[0])
user_inner_id = []
item_inner_id = []
rating_inner = []
for (u, i, r) in trainset.all_ratings():
#print(u, i, r)
user_inner_id.append(u)
item_inner_id.append(i)
rating_inner.append(r)
size_user = len(set(user_inner_id))
user_record = np.zeros((size_item,size_user))
n = len(user_inner_id)
for i in range(0, n) :
index_i = item_inner_id[i]
index_j = user_inner_id[i]
user_record[index_i][index_j] = 1
user_submit_record = mat(user_record)
# 生成每个用户所做的所有题目与第a道的相似度之和
user_with_all_sim = dot(sim_matrix, user_submit_record)
# 生成每个题目和所有题目的所有相似度之和
one_maxtrix = mat(ones((size_item,size_user)))
item_with_all_sim = dot(sim_matrix,one_maxtrix)
# 处理item_with_all_sim的0元素
item_with_all_sim_1 = item_with_all_sim.getA().copy()
for i in range(0,size_item):
for j in range(0, size_user):
if item_with_all_sim_1[i][j]==0:
item_with_all_sim_1[i][j] = -Inf
item_with_all_sim_1
# 计算加权平均, 生成推荐矩阵
recommand_matrix = user_with_all_sim / item_with_all_sim_1
'''
对推荐矩阵进行处理
'''
# 将推荐矩阵中用户做过的题目设置为-Tnf
recommand_arr = recommand_matrix.getA().copy()
user_submit_arr = user_record.copy()
size_raw = len(user_record)
size_col = len(user_record[0])
for i in range(0,size_raw):
for j in range(0, size_col):
if user_submit_arr[i][j]==1 :
recommand_arr[i][j] = -Inf
# 排序
test_arr = recommand_arr.copy()
index_arr = np.argsort(-test_arr, axis = 0) #按列排序
'''
计算最后的top-k推荐矩阵
'''
full_recom_arr = (recommand_arr.copy()).T
recon_index = index_arr.copy().T
size_top = 10
recom_topk = []
tmp_topk = []
for i in range(0, size_user): # 用户
tmp_topk = [] # 保存用id + 推荐前20的题目id
user = algo1.trainset.to_raw_uid(i) # 用户
tmp_topk.append(user)
for j in range(0, size_top): # 题目
index_j = recon_index[i][j] # 这里的index就是item内部的id
problem_inner_id = algo1.trainset.to_raw_iid(index_j)
tmp_topk.append(problem_inner_id)
recom_topk.append(tmp_topk)
# 将推荐矩阵写入文件
print('将推荐矩阵写入文件...')
df_recom_top = pd.DataFrame(recom_topk)
# print(df_recom_top.icol[0])
return df_recom_top[df_recom_top.iloc[:,0]==self.id]
# print(df_recom_top[df_recom_top[:0]=='*Miracle*'])
# df_recom_top.to_csv('output.txt', encoding='utf-8', index=False, header=False)
# 调用方式
test_path = 'new_suprise_data.txt'
test = Recommandation(test_path, 10, "wgy")
test.getRecomMat()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/ontology-edu/online-judge-recommendation.git
git@gitee.com:ontology-edu/online-judge-recommendation.git
ontology-edu
online-judge-recommendation
online-judge-recommendation
master

搜索帮助