1 Star 3 Fork 0

ydsungan/图书推荐系统

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
item_cf.py 4.26 KB
一键复制 编辑 原始数据 按行查看 历史
ydsungan 提交于 2021-11-23 15:01 . 1123 backup
import csv
from itertools import islice
import numpy as np
import math
def item_similarity(train_data, max_id):
similarity = np.zeros([max_id + 1, max_id + 1])
#item_like_num表示喜欢每个物品的人数
item_like_num = np.zeros(max_id + 1)
for user_like_list in train_data:
for i in range(len(user_like_list) - 1):
item_like_num[user_like_list[i]] += 1
for j in range(i + 1, len(user_like_list)):
similarity[user_like_list[i]][user_like_list[j]] += 1
similarity[user_like_list[j]][user_like_list[i]] += 1
item_like_num[user_like_list[len(user_like_list) - 1]] += 1
#此时similarity[i][j]计算的是同时喜欢物品 i 和物品 j 的用户数
for i in range(max_id + 1):
for j in range(max_id + 1):
similarity[i][j] /= math.sqrt(item_like_num[i] * item_like_num[j])
#similarity此时归一化
return similarity
def recommendation(test_user_id_list, train_user_id_list, users_like_list, similarity, K):
user_recommendation_list = {}
for user_id in test_user_id_list:
index = train_user_id_list.index(user_id)
recommendation_score = {}
for liked_item_id in users_like_list[index]:
#找出与liked_item_id的物品相似的K个物品
topk_similar = similarity[liked_item_id].argsort()[-K:][::-1]
for similar_item_id in topk_similar:
if recommendation_score.get(similar_item_id) is None:
recommendation_score[similar_item_id] = similarity[liked_item_id][similar_item_id]
else:
recommendation_score[similar_item_id] += similarity[liked_item_id][similar_item_id]
recommendation_score = sorted(recommendation_score.items(), key=lambda x: x[1], reverse=True)
top10_recommendation = []
for id, score in recommendation_score[:10]:
top10_recommendation.append(id)
user_recommendation_list[user_id] = top10_recommendation
return user_recommendation_list
def load_test_data(data_path):
file = csv.reader(open(data_path, "r", encoding="utf-8"))
user_id_list = []
for line in islice(file, 1, None):
user_id_list.append(int(line[0]))
return user_id_list
def load_train_data(data_path):
#给每个用户生成一个喜欢的物品的列表
#max_id记录物品id的最大值
max_id = 0
file = csv.reader(open(data_path, "r", encoding="utf-8"))
users_like_list = []
train_user_id_list = [0]
user_id = 0
user_like_list = []
for line in islice(file, 1, None):
if int(line[0]) == user_id:
user_like_list.append(int(line[1]))
else:
user_id = int(line[0])
train_user_id_list.append(user_id)
if max(user_like_list) > max_id:
max_id = max(user_like_list)
users_like_list.append(user_like_list)
user_like_list = []
user_like_list.append(int(line[1]))
users_like_list.append(user_like_list)
return users_like_list, train_user_id_list, max_id
def write_res_to_csv(user_recommendation_res):
file = open("submission_v1.csv", "w", encoding="utf-8", newline="")
csv_writer = csv.writer(file)
csv_writer.writerow(["user_id", "item_id"])
for user_id, recommendation_list in user_recommendation_res.items():
for item_id in recommendation_list:
csv_writer.writerow([str(user_id), str(item_id)])
file.close()
if __name__ == "__main__":
train_data_path = "C:\\Users\\ydsun\\Downloads\\book_train_dataset.csv"
test_data_path = "C:\\Users\\ydsun\\Downloads\\book_test_dataset.csv"
# K = 10即用户喜欢物品A,则寻找与物品最相似的 K种物品
K = 10
users_like_list, train_user_id_list, max_id = load_train_data(train_data_path)
print("训练数据读取完成...")
similarity = item_similarity(users_like_list, max_id)
print("物品相似度计算完成...")
test_user_id_list = load_test_data(test_data_path)
print("测试user_id读取完成...")
user_recommendation_res = recommendation(test_user_id_list, train_user_id_list, users_like_list, similarity, K)
print("完成推荐,正在写入文件...")
write_res_to_csv(user_recommendation_res)
print("submission文件生成.")
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/ydsungan/book-recommendation-system.git
git@gitee.com:ydsungan/book-recommendation-system.git
ydsungan
book-recommendation-system
图书推荐系统
master

搜索帮助

0d507c66 1850385 C8b1a773 1850385