-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrecommendations.py
More file actions
150 lines (133 loc) · 4.6 KB
/
recommendations.py
File metadata and controls
150 lines (133 loc) · 4.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
critics = {'Claudia Puig': {'Just My Luck': 3.0,
'Snakes on a Plane': 3.5,
'Superman Returns': 4.0,
'The Night Listener': 4.5,
'You, Me, and Dupree': 2.5},
'Gene Seymour': {'Just My Luck': 1.5,
'Lady in the Water': 3.0,
'Snakes on a Plane': 3.5,
'Superman Returns': 5.0,
'The Night Listener': 3.0,
'You, Me, and Dupree': 3.5},
'Jack Mathews': {'Lady in the Water': 3.0,
'Snakes on a Plane': 4.0,
'Superman Returns': 5.0,
'The Night Listener': 3.0,
'You, Me, and Dupree': 3.5},
'Lisa Rose': {'Just My Luck': 3.0,
'Lady in the Water': 2.5,
'Snakes on a Plane': 3.5,
'Superman Returns': 3.5,
'The Night Listener': 3.0,
'You, Me, and Dupree': 2.5},
'Michael Phillips': {'Lady in the Water': 2.5,
'Snakes on a Plane': 3.0,
'Superman Returns': 3.5,
'The Night Listener': 4.0},
'Mick LaSalle': {'Just My Luck': 2.0,
'Lady in the Water': 3.0,
'Snakes on a Plane': 4.0,
'Superman Returns': 3.0,
'The Night Listener': 3.0,
'You, Me, and Dupree': 2.0},
'Toby': {'Snakes on a Plane': 4.5,
'Superman Returns': 4.0,
'You, Me, and Dupree': 1.0},
'Ali': {'Just My Luck': 2.0,
'Snakes on a Plane': 2.5,
'Superman Returns': 2.5,
'The Night Listener': 5.0}}
from math import sqrt
def sim_distance(prefs, person1, person2):
si={}
for item in prefs[person1]:
if item in prefs[person2]:
si[item]=1
if len(si)==0: return 0
sum_of_squares = sum([pow(prefs[person1][item] - prefs[person2][item],2)\
for item in si])
return 1/(1+sqrt(sum_of_squares))
def sim_pearson(prefs, p1, p2):
si = {}
for item in prefs[p1]:
if item in prefs[p2]:
si[item]=1
n = len(si)
if n == 0:
return 0
sum1 = sum([prefs[p1][it] for it in si])
sum2 = sum([prefs[p2][it] for it in si])
sum1Sq = sum([pow(prefs[p1][it],2) for it in si])
sum2Sq = sum([pow(prefs[p2][it],2) for it in si])
pSum = sum([prefs[p1][it]*prefs[p2][it] for it in si])
num = pSum-(sum1*sum2/n)
den = sqrt((sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))
if den == 0:
return 0
return num/den
def topMatches(prefs, person, n=3, similarity=sim_pearson):
scores = [(round(similarity(prefs,person,other), 2),other) \
for other in prefs if other != person]
scores.sort()
scores.reverse()
return scores[0:n]
def getRecommendations(prefs, person, similarity = sim_pearson):
totals = {}
simSums = {}
for other in prefs:
sim = similarity(prefs, person, other)
if sim <= 0: continue
for item in prefs[other]:
if item not in prefs[person] or prefs[person][item] == 0:
totals.setdefault(item, 0)
totals[item] += prefs[other][item]*sim
simSums.setdefault(item, 0)
simSums[item] += sim
rankings = [(round(total/simSums[item], 2),item) for item, total in totals.items()]
rankings.sort()
rankings.reverse()
return rankings
def transformPrefs(prefs):
result = {}
for person in prefs:
for item in prefs[person]:
result.setdefault(item, {})
result[item][person] = prefs[person][item]
return result
def calculateSimilarItems(prefs,n=10):
# Create a dictionary of items showing which other items they
# are most similar to.
result={}
# Invert the preference matrix to be item-centric
itemPrefs=transformPrefs(prefs)
c=0
for item in itemPrefs:
# Status updates for large datasets
c+=1
if c%100==0: print "%d / %d" % (c,len(itemPrefs))
# Find the most similar items to this one
scores=topMatches(itemPrefs,item,n=n,similarity=sim_distance)
result[item]=scores
return result
def getRecommendedItems(prefs,itemMatch,user):
userRatings=prefs[user]
scores={}
totalSim={}
# Loop over items rated by this user
for (item,rating) in userRatings.items():
# Loop over items similar to this one
for (similarity,item2) in itemMatch[item]:
# Ignore if this user has already rated this item
if item2 in userRatings: continue
# Weighted sum of rating times similarity
scores.setdefault(item2,0)
scores[item2]+=similarity*rating
# Sum of all the similarities
totalSim.setdefault(item2,0)
totalSim[item2]+=similarity
# Divide each total score by total weighting to get an average
rankings=[(round(score/totalSim[item], 2),item) for item,score in scores.items()]
# Return the rankings from highest to lowest
rankings.sort()
rankings.reverse()
return rankings