使用surprise框架为Movieslen数据集中的每个user推荐Top-N个item

xiaoxiao2025-11-06  3

#导入相关的库文件 import os from surprise import Dataset from surprise import Reader from surprise import SVD from surprise import accuracy from surprise.model_selection import train_test_split from surprise import evaluate, print_perf ##读取数据 #指定文件的路径 file_path = os.path.expanduser('ml-100k/u.data') #告诉文本阅读器,文本的格式是什么样子的 reader = Reader(line_format='user item rating timestamp', sep='\t') #加载数据 data = Dataset.load_from_file(file_path, reader=reader) # testset占比25%. #trainset, testset = train_test_split(data, test_size=.25) trainset = data.build_full_trainset() #这里使用SVD算法,也可以使用其他的算法 algo = SVD() # 在trainset上进行模型的训练, 在testset进行预测 algo.fit(trainset) #进行预测 testset = trainset.build_anti_testset() #这里的testset是trainset中 rui为0的(user, item, 0) predictions = algo.test(testset) # Then compute RMSE accuracy.rmse(predictions) RMSE: 0.6043 0.6042835704959628 from collections import defaultdict #defaultdict是一个字典,当key不存在时,会返回默认值 #从一个prediction集合中返回每个 user Top-N推荐 def get_top_n(predictions, n = 10): '''从一个prediction集合中返回每个 user Top-N推荐 Args: predictions(list of Prediction objects): The list of predictions, as returned by the test method of an algorithm. n(int): The number of recommendation to output for each user. Default is 10. Returns: A dict where keys are user (raw) ids and values are lists of tuples: [(raw item id, rating estimation), ...] of size n. ''' #首先将prediction映射到每个user上 top_n = defaultdict(list) for uid, iid, true_r, est, _ in predictions: top_n[uid].append((iid, est)) #再对每个user的item按照评分进行排序 for uid, user_ratings in top_n.items(): user_ratings.sort(key = lambda x : x[1], reverse = True) top_n[uid] = user_ratings[:n] #取前n个 return top_n get_top_n(predictions, n = 10)

#计算查准率 precision@k and 和查全率 recall@k

转载请注明原文地址: https://www.6miu.com/read-5039150.html

最新回复(0)