1 Star 3 Fork 0

ydsungan/图书推荐系统

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
svd2.py 3.41 KB
一键复制 编辑 原始数据 按行查看 历史
ydsungan 提交于 2021-11-23 15:01 . 1123 backup
import sparsesvd
import scipy
import csv
from itertools import islice
import numpy as np
import math
def gd(qi, pu, score, lr = 0.02, l = 0.005, iter = 40):
#lambda:l是惩罚系数
#lr是学习率
count = 1
while count <= iter:
qi = qi - lr * ((score - np.matmul(qi.T, pu)) * pu - l * qi)
pu = pu - lr * ((score - np.matmul(qi.T, pu)) * qi - l * pu)
count += 1
return qi, pu
def recommendation(test_user_id_list, row, col, data, m, n, k, goods_num):
coo_mat = scipy.sparse.coo_matrix((data, (row, col)), shape=(m, n))
csc_mat = coo_mat.tocsc()
ut, s, vt = sparsesvd.sparsesvd(csc_mat, k)
u = ut.T
# u.shape = m * k
# vt.shape = k * n
user_recommendation_list = {}
count = 0
for user_id in test_user_id_list:
recommendation_score = {}
pu = u[user_id]
pu = np.reshape(pu, [k, 1])
# 遍历所有物品,依次计算评分
for i in range(n):
# i是物品的id
qit = np.reshape(vt[:, i], [1, k])
score = np.matmul(qit, pu)[0][0]
#梯度下降得到新的qi 和 pu
qi, pu = gd(qit.T, pu, score)
score = np.matmul(qi.T, pu)[0][0]
recommendation_score[i] = score
recommendation_score = sorted(recommendation_score.items(), key=lambda x: x[1], reverse=True)
topk_recommendation = []
for id, score in recommendation_score[:goods_num]:
topk_recommendation.append(id)
user_recommendation_list[user_id] = topk_recommendation
if count % 20 == 0:
print("当前训练进度: {}".format(count / len(test_user_id_list) * 100))
count += 1
return user_recommendation_list
def load_test_data(data_path):
file = csv.reader(open(data_path, "r", encoding="utf-8"))
user_id_list = []
for line in islice(file, 1, None):
user_id_list.append(int(line[0]))
return user_id_list
def load_train_data(data_path):
row = []
col = []
data = []
file = csv.reader(open(data_path, "r", encoding="utf-8"))
for line in islice(file, 1, None):
row.append(int(line[0]))
col.append(int(line[1]))
data.append(1)
return row, col, data
def write_res_to_csv(user_recommendation_res):
file = open("submission_svd_gd.csv", "w", encoding="utf-8", newline="")
csv_writer = csv.writer(file)
csv_writer.writerow(["user_id", "item_id"])
for user_id, recommendation_list in user_recommendation_res.items():
for item_id in recommendation_list:
csv_writer.writerow([str(user_id), str(item_id)])
file.close()
if __name__ == "__main__":
train_data_path = "./dataset/book_train_dataset.csv"
test_data_path = "./dataset/book_test_dataset.csv"
# n = 10000物品的数量为10000
# goods_num = 10即用户喜欢物品A,则寻找与物品最相似的 K种物品
# k为svd分解系数
# m为用户的数量
n = 10000
k = 10
goods_num = 10
row, col, data = load_train_data(train_data_path)
print("训练数据加载完成...")
m = max(row) + 1
test_user_id_list = load_test_data(test_data_path)
print("测试数据加载完成...")
user_recommendation_list = recommendation(test_user_id_list, row, col, data, m, n, k, goods_num)
print("用户物品推荐完成, 正在写入文件...")
write_res_to_csv(user_recommendation_list)
print("测试结果写入文件完成!")
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/ydsungan/book-recommendation-system.git
git@gitee.com:ydsungan/book-recommendation-system.git
ydsungan
book-recommendation-system
图书推荐系统
master

搜索帮助