import warnings
warnings.filterwarnings("ignore")import pandas as pd
import numpy as np
import recmetrics
import matplotlib.pyplot as plt
from surprise import Reader,SVD,Dataset
from surprise.model_selection import train_test_split
# 批量的测试结果
test = test.copy().groupby('userId')['movieId'].agg({'actual':(lambda x:list(set(x)))})
cf_recs=[]=[]for user in test.index:
cf_predictions=get_users_predictions(user,10,cf_model)
cf_recs.append(cf_predictions)
test['cf_predictions']=cf_recs
test.head()
actual
cf_predictions
userId
156
[6, 2056, 10, 15, 17, 4117, 22, 23, 24, 2073, ...
[2028, 2762, 1198, 1704, 1242, 593, 1210, 919,...
208
[3072, 1, 69122, 2567, 3079, 2570, 44555, 1036...
[912, 608, 924, 1207, 898, 922, 1256, 44555, 7...
359
[1, 32770, 515, 39427, 2565, 37382, 4103, 6964...
[1272, 953, 2804, 2762, 2918, 1207, 1233, 1201...
394
[1537, 33794, 26116, 3077, 4617, 2058, 2571, 5...
[858, 922, 2019, 608, 5291, 1228, 3435, 1219, ...
572
[3, 108548, 7173, 4104, 54281, 91658, 2571, 30...
[2571, 50, 589, 79132, 47, 58559, 7361, 2959, ...
实验对比
#排行榜
popularity_recs = ratings.movieId.value_counts().head(10).index.tolist()
pop_recs=[]for user in test.index:
pop_predictions=popularity_recs
pop_recs.append(pop_predictions)
test['pop_predictions']=pop_recs
test.head()
actual
cf_predictions
pop_predictions
userId
156
[6, 2056, 10, 15, 17, 4117, 22, 23, 24, 2073, ...
[2028, 2762, 1198, 1704, 1242, 593, 1210, 919,...
[1198, 1270, 593, 2762, 318, 2571, 260, 1240, ...
208
[3072, 1, 69122, 2567, 3079, 2570, 44555, 1036...
[912, 608, 924, 1207, 898, 922, 1256, 44555, 7...
[1198, 1270, 593, 2762, 318, 2571, 260, 1240, ...
359
[1, 32770, 515, 39427, 2565, 37382, 4103, 6964...
[1272, 953, 2804, 2762, 2918, 1207, 1233, 1201...
[1198, 1270, 593, 2762, 318, 2571, 260, 1240, ...
394
[1537, 33794, 26116, 3077, 4617, 2058, 2571, 5...
[858, 922, 2019, 608, 5291, 1228, 3435, 1219, ...
[1198, 1270, 593, 2762, 318, 2571, 260, 1240, ...
572
[3, 108548, 7173, 4104, 54281, 91658, 2571, 30...
[2571, 50, 589, 79132, 47, 58559, 7361, 2959, ...
[1198, 1270, 593, 2762, 318, 2571, 260, 1240, ...
# 随机选择
ran_recs=[]for user in test.index:
random_predictions = ratings.movieId.sample(10).values.tolist()
ran_recs.append(random_predictions)
test['random_predictions']=ran_recs
test.head()
actual
cf_predictions
pop_predictions
random_predictions
userId
156
[6, 2056, 10, 15, 17, 4117, 22, 23, 24, 2073, ...
[2028, 2762, 1198, 1704, 1242, 593, 1210, 919,...
[1198, 1270, 593, 2762, 318, 2571, 260, 1240, ...
[6310, 88125, 7458, 3182, 60684, 5582, 2975, 1...
208
[3072, 1, 69122, 2567, 3079, 2570, 44555, 1036...
[912, 608, 924, 1207, 898, 922, 1256, 44555, 7...
[1198, 1270, 593, 2762, 318, 2571, 260, 1240, ...
[3823, 2109, 6924, 5120, 4238, 1407, 5266, 299...
359
[1, 32770, 515, 39427, 2565, 37382, 4103, 6964...
[1272, 953, 2804, 2762, 2918, 1207, 1233, 1201...
[1198, 1270, 593, 2762, 318, 2571, 260, 1240, ...
[57421, 1032, 8379, 2539, 5010, 2100, 111, 196...
394
[1537, 33794, 26116, 3077, 4617, 2058, 2571, 5...
[858, 922, 2019, 608, 5291, 1228, 3435, 1219, ...
[1198, 1270, 593, 2762, 318, 2571, 260, 1240, ...
[77201, 1372, 7484, 3250, 521, 1396, 5971, 260...
572
[3, 108548, 7173, 4104, 54281, 91658, 2571, 30...
[2571, 50, 589, 79132, 47, 58559, 7361, 2959, ...
[1198, 1270, 593, 2762, 318, 2571, 260, 1240, ...
[5872, 6982, 832, 4495, 70742, 65596, 1, 971, ...
topK 求精度与召回率Precision与Recall
覆盖率
defprediction_coverage(predicted,catalog):
predicted_flattened =[p for sublist in predicted for p in sublist]
unique_predictions =len(set(predicted_flattened))
prediction_coverage =round(unique_predictions/(len(catalog)*1.0)*100,2)return prediction_coverage