代码拉取完成,页面将自动刷新
import csv
from itertools import islice
import numpy as np
import math
def item_similarity(train_data, max_id):
#与item_cf.py中的相似度计算的区别在于:本相似度算法中加入了对活跃用户的软性惩罚
similarity = np.zeros([max_id + 1, max_id + 1])
#item_like_num表示喜欢每个物品的人数
item_like_num = np.zeros(max_id + 1)
count = 0
for user_like_list in train_data:
iuf = 1 / math.log(1 + len(user_like_list) * 1.0)
#iuf:inverse user frequence:用户活跃度对数的倒数
for i in range(len(user_like_list) - 1):
item_like_num[user_like_list[i]] += 1
for j in range(i + 1, len(user_like_list)):
similarity[user_like_list[i]][user_like_list[j]] += iuf
similarity[user_like_list[j]][user_like_list[i]] += iuf
item_like_num[user_like_list[len(user_like_list) - 1]] += 1
count += 1
if count % 100 == 0:
print("物品相似度计算进度: {:.2f}%".format(count / len(train_data) * 100))
#此时similarity[i][j]计算的是同时喜欢物品 i 和物品 j 的用户数
for i in range(max_id + 1):
for j in range(max_id + 1):
similarity[i][j] /= math.sqrt(item_like_num[i] * item_like_num[j])
#similarity此时归一化
return similarity
def recommendation(test_user_id_list, train_user_id_list, users_like_list, similarity, K):
user_recommendation_list = {}
count = 0
for user_id in test_user_id_list:
index = train_user_id_list.index(user_id)
recommendation_score = {}
for liked_item_id in users_like_list[index]:
#找出与liked_item_id的物品相似的K个物品
topk_similar = similarity[liked_item_id].argsort()[-K:][::-1]
for similar_item_id in topk_similar:
if recommendation_score.get(similar_item_id) is None:
recommendation_score[similar_item_id] = similarity[liked_item_id][similar_item_id]
else:
recommendation_score[similar_item_id] += similarity[liked_item_id][similar_item_id]
recommendation_score = sorted(recommendation_score.items(), key=lambda x: x[1], reverse=True)
top10_recommendation = []
for id, score in recommendation_score[:10]:
top10_recommendation.append(id)
user_recommendation_list[user_id] = top10_recommendation
if count % 100 == 0:
print("当前推荐进度: {:.2f}%".format(count / len(test_user_id_list) * 100))
count += 1
return user_recommendation_list
def load_test_data(data_path):
file = csv.reader(open(data_path, "r", encoding="utf-8"))
user_id_list = []
for line in islice(file, 1, None):
user_id_list.append(int(line[0]))
return user_id_list
def load_train_data(data_path):
#给每个用户生成一个喜欢的物品的列表
#max_id记录物品id的最大值
max_id = 0
file = csv.reader(open(data_path, "r", encoding="utf-8"))
users_like_list = []
train_user_id_list = [0]
user_id = 0
user_like_list = []
for line in islice(file, 1, None):
if int(line[0]) == user_id:
user_like_list.append(int(line[1]))
else:
user_id = int(line[0])
train_user_id_list.append(user_id)
if max(user_like_list) > max_id:
max_id = max(user_like_list)
users_like_list.append(user_like_list)
user_like_list = []
user_like_list.append(int(line[1]))
users_like_list.append(user_like_list)
return users_like_list, train_user_id_list, max_id
def write_res_to_csv(user_recommendation_res):
file = open("submission_v1.csv", "w", encoding="utf-8", newline="")
csv_writer = csv.writer(file)
csv_writer.writerow(["user_id", "item_id"])
for user_id, recommendation_list in user_recommendation_res.items():
for item_id in recommendation_list:
csv_writer.writerow([str(user_id), str(item_id)])
file.close()
if __name__ == "__main__":
train_data_path = "./dataset/book_train_dataset.csv"
test_data_path = "./dataset/book_test_dataset.csv"
# K = 10即用户喜欢物品A,则寻找与物品最相似的 K种物品
K = 10
users_like_list, train_user_id_list, max_id = load_train_data(train_data_path)
print("训练数据读取完成...")
similarity = item_similarity(users_like_list, max_id)
print("物品相似度计算完成...")
test_user_id_list = load_test_data(test_data_path)
print("测试user_id读取完成...")
user_recommendation_res = recommendation(test_user_id_list, train_user_id_list, users_like_list, similarity, K)
print("完成推荐,正在写入文件...")
write_res_to_csv(user_recommendation_res)
print("submission文件生成.")
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。