代码拉取完成,页面将自动刷新
import sparsesvd
import scipy
import csv
from itertools import islice
import numpy as np
import math
def recommendation(test_user_id_list, row, col, data, m, n, k, goods_num):
lr = 0.05
l = 0.01
iter = 6
coo_mat = scipy.sparse.coo_matrix((data, (row, col)), shape=(m, n))
csc_mat = coo_mat.tocsc()
ut, s, vt = sparsesvd.sparsesvd(csc_mat, k)
u = ut.T
rui = np.zeros([m,n])
p = [] #存储pu,pu为用户的行向量
q = [] #存储qi,qi为物品的列向量
for i in range(m):
pu = np.reshape(u[i], [1, k])
p.append(pu)
for j in range(n):
qi = np.reshape(vt[:, j], [k, 1])
q.append(qi)
print("数据准备完成..")
count = 0
for i in range(m):
pu = p[i]
for j in range(n):
qi = q[j]
rui[i][j] = np.matmul(pu, qi)[0][0]
count += 1
if count % 100 == 0:
print("计算rui进度:{:.2f}%".format(count / m * 100))
for a in range(iter):
loss = 0
for i in range(m):
pu = p[i]
for j in range(n):
qi = q[j]
err = rui[i][j] - np.matmul(pu, qi)[0][0]
loss += err
qi = qi + lr * (err * pu - l * qi)
pu = pu + lr * (err * qi - l * pu)
p[i] = pu
q[j] = qi
lr *= 0.9
print("梯度下降进度:{:.2f}%,损失:{}".format(a / iter * 100, loss))
user_recommendation_list = {}
count = 0
for user_id in test_user_id_list:
recommendation_score = {}
pu = p[user_id]
# 遍历所有物品,依次计算评分
for i in range(n):
# i是物品的id
score = np.matmul(q[i], pu)[0][0]
recommendation_score[i] = score
recommendation_score = sorted(recommendation_score.items(), key=lambda x: x[1], reverse=True)
topk_recommendation = []
for id, score in recommendation_score[:goods_num]:
topk_recommendation.append(id)
user_recommendation_list[user_id] = topk_recommendation
if count % 100 == 0:
print("当前推荐进度: {:.2f}%".format(count / len(test_user_id_list) * 100))
count += 1
return user_recommendation_list
def load_test_data(data_path):
file = csv.reader(open(data_path, "r", encoding="utf-8"))
user_id_list = []
for line in islice(file, 1, None):
user_id_list.append(int(line[0]))
return user_id_list
def load_train_data(data_path):
row = []
col = []
data = []
file = csv.reader(open(data_path, "r", encoding="utf-8"))
for line in islice(file, 1, None):
row.append(int(line[0]))
col.append(int(line[1]))
data.append(1)
return row, col, data
def write_res_to_csv(user_recommendation_res):
file = open("submission_lfm.csv", "w", encoding="utf-8", newline="")
csv_writer = csv.writer(file)
csv_writer.writerow(["user_id", "item_id"])
for user_id, recommendation_list in user_recommendation_res.items():
for item_id in recommendation_list:
csv_writer.writerow([str(user_id), str(item_id)])
file.close()
if __name__ == "__main__":
train_data_path = "./dataset/book_train_dataset.csv"
test_data_path = "./dataset/book_test_dataset.csv"
# n = 10000物品的数量为10000
# goods_num = 10即用户喜欢物品A,则寻找与物品最相似的 K种物品
# k为svd分解系数
# m为用户的数量
n = 10000
k = 10
goods_num = 10
row, col, data = load_train_data(train_data_path)
print("训练数据加载完成...")
m = max(row) + 1
test_user_id_list = load_test_data(test_data_path)
print("测试数据加载完成...")
user_recommendation_list = recommendation(test_user_id_list, row, col, data, m, n, k, goods_num)
print("用户物品推荐完成, 正在写入文件...")
write_res_to_csv(user_recommendation_list)
print("测试结果写入文件完成!")
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。