import os
from surprise
import Dataset
from surprise
import Reader
from surprise
import SVD
from surprise
import accuracy
from surprise
.model_selection
import train_test_split
from surprise
import evaluate
, print_perf
file_path
= os
.path
.expanduser
('ml-100k/u.data')
reader
= Reader
(line_format
='user item rating timestamp', sep
='\t')
data
= Dataset
.load_from_file
(file_path
, reader
=reader
)
trainset
= data
.build_full_trainset
()
algo
= SVD
()
algo
.fit
(trainset
)
testset
= trainset
.build_anti_testset
()
predictions
= algo
.test
(testset
)
accuracy
.rmse
(predictions
)
RMSE: 0.6043
0.6042835704959628
from collections
import defaultdict
def get_top_n(predictions
, n
= 10):
'''从一个prediction集合中返回每个 user Top-N推荐
Args:
predictions(list of Prediction objects): The list of predictions, as
returned by the test method of an algorithm.
n(int): The number of recommendation to output for each user. Default
is 10.
Returns:
A dict where keys are user (raw) ids and values are lists of tuples:
[(raw item id, rating estimation), ...] of size n.
'''
top_n
= defaultdict
(list)
for uid
, iid
, true_r
, est
, _
in predictions
:
top_n
[uid
].append
((iid
, est
))
for uid
, user_ratings
in top_n
.items
():
user_ratings
.sort
(key
= lambda x
: x
[1], reverse
= True)
top_n
[uid
] = user_ratings
[:n
]
return top_n
get_top_n
(predictions
, n
= 10)
#计算查准率 precision@k and 和查全率 recall@k