1 Star 3 Fork 0

ydsungan/图书推荐系统

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
svd4.py 4.73 KB
一键复制 编辑 原始数据 按行查看 历史
ydsungan 提交于 2021-11-23 15:01 . 1123 backup
import sparsesvd
import scipy
import csv
from itertools import islice
import numpy as np
import math
def recommendation(train_user_id_list, test_user_id_list, row, col, data, m, n, k, goods_num):
lr = 0.05
l = 0.01
iter = 8
coo_mat = scipy.sparse.coo_matrix((data, (row, col)), shape=(m, n))
csc_mat = coo_mat.tocsc()
ut, s, vt = sparsesvd.sparsesvd(csc_mat, k)
u = ut.T
rui = np.zeros([m,n])
p = [] #存储pu,pu为用户的行向量
q = [] #存储qi,qi为物品的列向量
for i in range(m):
pu = np.reshape(u[i], [1, k])
p.append(pu)
for j in range(n):
qi = np.reshape(vt[:, j], [k, 1])
q.append(qi)
print("数据准备完成..")
count = 0
for i in range(m):
pu = p[i]
for j in range(n):
qi = q[j]
rui[i][j] = np.matmul(pu, qi)[0][0]
count += 1
if count % 100 == 0:
print("计算rui进度:{:.2f}%".format(count / m * 100))
for a in range(iter):
for i in range(m):
pu = p[i]
for j in range(n):
qi = q[j]
qi = qi + lr * ((rui[i][j] - np.matmul(pu, qi)[0][0]) * pu - l * qi)
pu = pu + lr * ((rui[i][j] - np.matmul(pu, qi)[0][0]) * qi - l * pu)
p[i] = pu
q[j] = qi
lr *= 0.9
print("梯度下降进度:{:.2f}%".format(a / iter * 100))
user_recommendation_list = {}
count = 0
for user_id in test_user_id_list:
recommendation_score = {}
index = train_user_id_list.index(user_id)
pu = p[index]
# 遍历所有物品,依次计算评分
for i in range(n):
# i是物品的id
score = np.matmul(q[i], pu)[0][0]
recommendation_score[i] = score
recommendation_score = sorted(recommendation_score.items(), key=lambda x: x[1], reverse=True)
topk_recommendation = []
for id, score in recommendation_score[:goods_num]:
topk_recommendation.append(id)
user_recommendation_list[user_id] = topk_recommendation
if count % 100 == 0:
print("当前推荐进度: {:.2f}%".format(count / len(test_user_id_list) * 100))
count += 1
return user_recommendation_list
def load_test_data(data_path):
file = csv.reader(open(data_path, "r", encoding="utf-8"))
user_id_list = []
for line in islice(file, 1, None):
user_id_list.append(int(line[0]))
return user_id_list
def pretreat_data(data_path, n):
user_id_list = [0]
item_list = []
file = csv.reader(open(data_path, "r", encoding="utf-8"))
uid = 0
item = []
count = 0
total = 5869632
for line in islice(file, 1, None):
if int(line[0]) == uid:
item.append(int(line[1]))
else:
user_id_list.append(int(line[0]))
item_list.append(item)
uid = int(line[0])
item = []
item.append(int(line[1]))
count += 1
if count % 10000 == 0:
print("预处理进度: {:.2f}%".format(count / total * 100))
item_list.append(item)
return user_id_list, item_list
def load_train_data(item_list):
row = []
col = []
data = []
for i in range(len(item_list)):
for item_id in item_list[i]:
row.append(i)
col.append(item_id)
data.append(1)
return row, col, data
def write_res_to_csv(user_recommendation_res):
file = open("submission_svd_gd4.csv", "w", encoding="utf-8", newline="")
csv_writer = csv.writer(file)
csv_writer.writerow(["user_id", "item_id"])
for user_id, recommendation_list in user_recommendation_res.items():
for item_id in recommendation_list:
csv_writer.writerow([str(user_id), str(item_id)])
file.close()
if __name__ == "__main__":
train_data_path = "./dataset/book_train_dataset.csv"
test_data_path = "./dataset/book_test_dataset.csv"
# n = 10000物品的数量为10000
# goods_num = 10即用户喜欢物品A,则寻找与物品最相似的 K种物品
# k为svd分解系数
# m为用户的数量
n = 10000
k = 10
goods_num = 10
train_user_id_list, item_list = pretreat_data(train_data_path, n)
print("训练预处理完成...")
row, col, data = load_train_data(item_list)
print("训练数据加载完成...")
m = len(train_user_id_list)
test_user_id_list = load_test_data(test_data_path)
print("测试数据加载完成...")
user_recommendation_list = recommendation(train_user_id_list, test_user_id_list, row, col, data, m, n, k, goods_num)
print("用户物品推荐完成, 正在写入文件...")
write_res_to_csv(user_recommendation_list)
print("测试结果写入文件完成!")
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/ydsungan/book-recommendation-system.git
git@gitee.com:ydsungan/book-recommendation-system.git
ydsungan
book-recommendation-system
图书推荐系统
master

搜索帮助

0d507c66 1850385 C8b1a773 1850385