代码拉取完成,页面将自动刷新
import sparsesvd
import scipy
import csv
from itertools import islice
import numpy as np
import math
def gd(qi, pu, score, lr = 0.02, l = 0.005, iter = 40):
#lambda:l是惩罚系数
#lr是学习率
count = 1
while count <= iter:
qi = qi - lr * ((score - np.matmul(qi.T, pu)) * pu - l * qi)
pu = pu - lr * ((score - np.matmul(qi.T, pu)) * qi - l * pu)
count += 1
return qi, pu
def recommendation(test_user_id_list, row, col, data, m, n, k, goods_num):
coo_mat = scipy.sparse.coo_matrix((data, (row, col)), shape=(m, n))
csc_mat = coo_mat.tocsc()
ut, s, vt = sparsesvd.sparsesvd(csc_mat, k)
u = ut.T
# u.shape = m * k
# vt.shape = k * n
user_recommendation_list = {}
count = 0
for user_id in test_user_id_list:
recommendation_score = {}
pu = u[user_id]
pu = np.reshape(pu, [k, 1])
# 遍历所有物品,依次计算评分
for i in range(n):
# i是物品的id
qit = np.reshape(vt[:, i], [1, k])
score = np.matmul(qit, pu)[0][0]
#梯度下降得到新的qi 和 pu
qi, pu = gd(qit.T, pu, score)
score = np.matmul(qi.T, pu)[0][0]
recommendation_score[i] = score
recommendation_score = sorted(recommendation_score.items(), key=lambda x: x[1], reverse=True)
topk_recommendation = []
for id, score in recommendation_score[:goods_num]:
topk_recommendation.append(id)
user_recommendation_list[user_id] = topk_recommendation
if count % 20 == 0:
print("当前训练进度: {}".format(count / len(test_user_id_list) * 100))
count += 1
return user_recommendation_list
def load_test_data(data_path):
file = csv.reader(open(data_path, "r", encoding="utf-8"))
user_id_list = []
for line in islice(file, 1, None):
user_id_list.append(int(line[0]))
return user_id_list
def load_train_data(data_path):
row = []
col = []
data = []
file = csv.reader(open(data_path, "r", encoding="utf-8"))
for line in islice(file, 1, None):
row.append(int(line[0]))
col.append(int(line[1]))
data.append(1)
return row, col, data
def write_res_to_csv(user_recommendation_res):
file = open("submission_svd_gd.csv", "w", encoding="utf-8", newline="")
csv_writer = csv.writer(file)
csv_writer.writerow(["user_id", "item_id"])
for user_id, recommendation_list in user_recommendation_res.items():
for item_id in recommendation_list:
csv_writer.writerow([str(user_id), str(item_id)])
file.close()
if __name__ == "__main__":
train_data_path = "./dataset/book_train_dataset.csv"
test_data_path = "./dataset/book_test_dataset.csv"
# n = 10000物品的数量为10000
# goods_num = 10即用户喜欢物品A,则寻找与物品最相似的 K种物品
# k为svd分解系数
# m为用户的数量
n = 10000
k = 10
goods_num = 10
row, col, data = load_train_data(train_data_path)
print("训练数据加载完成...")
m = max(row) + 1
test_user_id_list = load_test_data(test_data_path)
print("测试数据加载完成...")
user_recommendation_list = recommendation(test_user_id_list, row, col, data, m, n, k, goods_num)
print("用户物品推荐完成, 正在写入文件...")
write_res_to_csv(user_recommendation_list)
print("测试结果写入文件完成!")
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。