Skip to content

Commit 648c3b0

Browse files
committed
Merge branch 'emm_iters_exps'
- Publish final implementations used for ICML camera ready
2 parents 78e51ee + db77fe6 commit 648c3b0

19 files changed

Lines changed: 321 additions & 1122 deletions

.gitignore

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,62 @@
1-
# ignore compilation files
1+
# Byte-compiled / optimized / DLL files
22
__pycache__/
33
*.py[cod]
44
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
env/
12+
build/
13+
develop-eggs/
14+
dist/
15+
downloads/
16+
eggs/
17+
.eggs/
18+
lib/
19+
lib64/
20+
parts/
21+
sdist/
22+
var/
23+
*.egg-info/
24+
.installed.cfg
25+
*.egg
26+
27+
# PyInstaller
28+
# Usually these files are written by a python script from a template
29+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
30+
*.manifest
31+
*.spec
32+
33+
# Installer logs
34+
pip-log.txt
35+
pip-delete-this-directory.txt
36+
37+
# Unit test / coverage reports
38+
htmlcov/
39+
.tox/
40+
.coverage
41+
.coverage.*
42+
.cache
43+
nosetests.xml
44+
coverage.xml
45+
*,cover
46+
.hypothesis/
47+
48+
# Translations
49+
*.mo
50+
*.pot
51+
52+
# Django stuff:
53+
*.log
54+
55+
# Sphinx documentation
56+
docs/_build/
57+
58+
# PyBuilder
59+
target/
60+
61+
#Ipython Notebook
62+
.ipynb_checkpoints

README.md

Lines changed: 35 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,35 @@
1-
PrefPy
2-
======
3-
4-
Rank aggregation algorithms in the computer science field of computational social choice
5-
6-
7-
What's New
8-
==========
9-
10-
- Generalized method of moments algorithm for mixtures of Plackett-Luce models
11-
- Implementation of EMM algorithm for mixtures of Plackett-Luce by Gormley & Murphy
12-
13-
14-
Work In Progress
15-
================
16-
17-
- This is an initial version of the Python package form, further structural changes will be coming
18-
- Module naming conventions will be changed; currently the algorithm files take the initials of the names of the papers from which they originate (e.g. "gmmra" for Generalized Method of Moments for Rank Aggregation)
19-
- Mixture Model for Plackett-Luce EMM algorithm by Gormley & Murphy is forthcoming pending verification and testing of the method
20-
- Random utility model algorithms (verification of the implentation needs to be completed)
21-
22-
23-
Installation
24-
============
25-
26-
- Use of MATLAB optimization in this package requires Python 3.4 due to lack of support yet for Python 3.5 by the MATLAB Engine
27-
28-
Install by running setup.py with Python 3.4 (or greater) with the command
29-
30-
python3 setup.py install
31-
32-
Symlink install while developing to keep changes in the code instead with the command
33-
34-
python3 setup.py develop
1+
PrefPy
2+
======
3+
4+
Rank aggregation algorithms in the computer science field of computational social choice
5+
6+
7+
What's New
8+
==========
9+
10+
- Experiments and tests have been factored out of the repository (now located at https://github.com/pdpiech/prefpy-experiments)
11+
- Generalized method of moments algorithm for mixtures of Plackett-Luce models
12+
- Implementation of EMM algorithm for mixtures of Plackett-Luce by Gormley & Murphy
13+
14+
15+
Work In Progress
16+
================
17+
18+
- This is an initial version of the Python package form, further structural changes will be coming
19+
- Module naming conventions will be changed; currently the algorithm files take the initials of the names of the papers from which they originate (e.g. "gmmra" for Generalized Method of Moments for Rank Aggregation)
20+
- Mixture Model for Plackett-Luce EMM algorithm by Gormley & Murphy is forthcoming pending verification and testing of the method
21+
- Random utility model algorithms (verification of the implentation needs to be completed)
22+
23+
24+
Installation
25+
============
26+
27+
- Use of MATLAB optimization in this package requires Python 3.4 due to lack of support yet for Python 3.5 by the MATLAB Engine
28+
29+
Install by running setup.py with Python 3.4 (or greater) with the command
30+
31+
python3 setup.py install
32+
33+
Symlink install while developing to keep changes in the code instead with the command
34+
35+
python3 setup.py develop

prefpy/__init__.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from . import aggregate
2+
from . import distributions
3+
from . import evbwie
4+
from . import gmm_mixpl_moments
5+
from . import gmm_mixpl_objectives
6+
from . import gmm_mixpl
7+
from . import gmmra
8+
from . import mmgbtl
9+
from . import plackettluce
10+
from . import stats
11+
from . import util
12+
13+
__all__ = ["aggregate", "distributions", "evbwie", "gmm_mixpl_moments",
14+
"gmm_mixpl_objectives", "gmm_mixpl", "gmmra", "mmgbtl",
15+
"plackettluce", "stats", "util"]

prefpy/evbwie.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
# A Mixture Modeling Approach by Gormley and Murphy, 2008
44

55
import numpy as np
6-
import aggregate
7-
import plackettluce as pl
8-
import stats
6+
from . import aggregate
7+
from . import plackettluce as pl
8+
from . import stats
99

1010

1111
class EMMMixPLResult:
@@ -107,11 +107,12 @@ def aggregate(self, rankings, K, epsilon, max_iters, epsilon_mm, max_iters_mm):
107107
z_h1[i][k] = (pi_h[k] * EMMMixPLAggregator.f(x[i], p_h[k])) / denom_sum
108108

109109
# M-Step:
110-
#for l in range(max_iters_mm):
111-
for l in range(int(g/50) + 5):
110+
for l in range(max_iters_mm):
111+
#for l in range(int(g/50) + 5):
112112
for k in range(K):
113113
normconst = 0
114-
pi_h1[k] = np.sum(z_h1.T[k]) / len(z_h1)
114+
if l == 0: # only need to compute pi at first MM iteration
115+
pi_h1[k] = np.sum(z_h1.T[k]) / len(z_h1)
115116
for j in range(self.m):
116117
omega_k_j = EMMMixPLAggregator.omega(k, j, z_h1, x) # numerator
117118
denom_sum = 0
@@ -132,15 +133,18 @@ def aggregate(self, rankings, K, epsilon, max_iters, epsilon_mm, max_iters_mm):
132133
p_h1[k][j] /= normconst
133134

134135
if (epsilon_mm != None and
135-
np.all(np.absolute(p_h1 - p_h) < epsilon_mm) and
136-
np.all(np.absolute(pi_h1 - pi_h) < epsilon_mm)):
136+
np.all(np.absolute(p_h1 - p_h) < epsilon_mm)):
137137
break
138138

139+
p_h = np.copy(p_h1) # deep copy p for next MM iteration
140+
# pi does not change across MM iterations, no copy needed
141+
139142
if (epsilon != None and
140143
np.all(np.absolute(p_h1 - p_h) < epsilon) and
141144
np.all(np.absolute(pi_h1 - pi_h) < epsilon)):
142145
break
143146

147+
# remember that assignments below are references only, not copies
144148
p_h = p_h1
145149
pi_h = pi_h1
146150

prefpy/evbwie1.py

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
# Implementation of algorithm (2) from
2+
# Exploring Voting Blocs Within the Irish Electorate:
3+
# A Mixture Modeling Approach by Gormley and Murphy, 2008
4+
5+
import numpy as np
6+
from . import aggregate
7+
from . import plackettluce as pl
8+
from . import stats
9+
10+
11+
class EMMMixPLResult:
12+
def __init__(self, num_alts, num_votes, num_mix, true_params, epsilon, max_iters, epsilon_mm, max_iters_mm, init_guess, soln_params, runtime):
13+
self.num_alts = num_alts
14+
self.num_votes = num_votes
15+
self.num_mix = num_mix
16+
self.true_params = true_params
17+
self.epsilon = epsilon
18+
self.max_iters = max_iters
19+
self.epsilon_mm = epsilon_mm
20+
self.max_iters_mm = max_iters_mm
21+
self.init_guess = init_guess
22+
self.soln_params = soln_params
23+
self.runtime = runtime
24+
25+
class EMMMixPLAggregator(aggregate.RankAggregator):
26+
27+
def c(x_i, j):
28+
try:
29+
return x_i[j]
30+
except IndexError:
31+
return -1
32+
33+
def f(x_i, p):
34+
prod = 1
35+
for t in range(len(x_i)):
36+
denom_sum = 0
37+
for s in range(t, len(p)):
38+
denom_sum += p[EMMMixPLAggregator.c(x_i, s)]
39+
prod *= p[EMMMixPLAggregator.c(x_i, t)] / denom_sum
40+
return prod
41+
42+
def indic(j, x_i, s):
43+
flag = j == EMMMixPLAggregator.c(x_i, s)
44+
if flag:
45+
return 1
46+
else:
47+
return 0
48+
49+
def delta(x_i, j, s, N):
50+
""" delta_i_j_s """
51+
flag = j == EMMMixPLAggregator.c(x_i, s)
52+
if flag and s < len(x_i):
53+
return 1
54+
elif s == N:
55+
found_equal = False
56+
for l in range(len(x_i)):
57+
if j == EMMMixPLAggregator.c(x_i, l):
58+
found_equal = True
59+
break
60+
if not found_equal:
61+
return 1
62+
return 0
63+
64+
def omega(k, j, z, x):
65+
""" omega_k_j """
66+
sum_out = 0
67+
for i in range(len(x)):
68+
sum_in = 0
69+
for t in range(len(x[i])):
70+
sum_in += z[i][k] * EMMMixPLAggregator.indic(j, x[i], t)
71+
sum_out += sum_in
72+
return sum_out
73+
74+
def aggregate(self, rankings, K, epsilon, tot_iters, epsilon_mm, max_iters_em):
75+
x = rankings # shorter pseudonym for voting data
76+
self.n = len(rankings) # number of votes
77+
78+
# pre-compute the delta values
79+
delta_i_j_s = np.empty((self.n, self.m, self.m + 1))
80+
for i in range(self.n):
81+
for j in range(self.m):
82+
for s in range(self.m + 1):
83+
delta_i_j_s[i][j][s] = EMMMixPLAggregator.delta(x[i], j, s, self.m)
84+
85+
# generate initial values for p and pi:
86+
p_h0 = np.random.rand(K, self.m)
87+
p_h0 /= np.sum(p_h0, axis=1, keepdims=True)
88+
89+
pi_h0 = np.random.rand(K)
90+
pi_h0 /= np.sum(pi_h0)
91+
92+
p_h = np.copy(p_h0)
93+
pi_h = np.copy(pi_h0)
94+
95+
inner = tot_iters // max_iters_em
96+
97+
for g in range(max_iters_em):
98+
#for g in range(max_iters):
99+
100+
p_h1 = np.empty((K, self.m))
101+
pi_h1 = np.empty(K)
102+
z_h1 = np.empty((self.n, K))
103+
104+
# E-Step:
105+
for i in range(self.n):
106+
for k in range(K):
107+
denom_sum = 0
108+
for k2 in range(K):
109+
denom_sum += pi_h[k2] * EMMMixPLAggregator.f(x[i], p_h[k2])
110+
z_h1[i][k] = (pi_h[k] * EMMMixPLAggregator.f(x[i], p_h[k])) / denom_sum
111+
112+
# M-Step:
113+
test = (g + 1) * inner + (max_iters_em - g - 1) * (inner + 1)
114+
if test < tot_iters:
115+
inner += 1
116+
for l in range(inner):
117+
#for l in range(max_iters_mm):
118+
#for l in range(int(g/50) + 5):
119+
for k in range(K):
120+
normconst = 0
121+
if l == 0: # only need to compute pi at first MM iteration
122+
pi_h1[k] = np.sum(z_h1.T[k]) / len(z_h1)
123+
for j in range(self.m):
124+
omega_k_j = EMMMixPLAggregator.omega(k, j, z_h1, x) # numerator
125+
denom_sum = 0
126+
for i in range(self.n):
127+
sum1 = 0
128+
for t in range(len(x[i])):
129+
sum2 = 0
130+
sum3 = 0
131+
for s in range(t, self.m):
132+
sum2 += p_h[k][EMMMixPLAggregator.c(x[i], s)]
133+
for s in range(t, self.m + 1):
134+
sum3 += delta_i_j_s[i][j][s]
135+
sum1 += z_h1[i][k] * (sum2 ** -1) * sum3
136+
denom_sum += sum1
137+
p_h1[k][j] = omega_k_j / denom_sum
138+
normconst += p_h1[k][j]
139+
for j in range(self.m):
140+
p_h1[k][j] /= normconst
141+
142+
if (epsilon_mm != None and
143+
np.all(np.absolute(p_h1 - p_h) < epsilon_mm)):
144+
break
145+
146+
p_h = np.copy(p_h1) # deep copy p for next MM iteration
147+
# pi does not change across MM iterations, no copy needed
148+
149+
if (epsilon != None and
150+
np.all(np.absolute(p_h1 - p_h) < epsilon) and
151+
np.all(np.absolute(pi_h1 - pi_h) < epsilon)):
152+
break
153+
154+
# remember that assignments below are references only, not copies
155+
p_h = p_h1
156+
pi_h = pi_h1
157+
158+
return (pi_h1, p_h1, pi_h0, p_h0)
159+
160+
def main():
161+
n = 100
162+
m = 4
163+
k = 2
164+
cand_set = np.arange(m)
165+
#np.random.seed(0)
166+
params, votes = pl.generate_mix2pl_dataset(n, m, useDirichlet=True)
167+
print("Ground-Truth Parameters:\n" + str(params))
168+
print("EMM Algorithm:")
169+
170+
emmagg = EMMMixPLAggregator(cand_set)
171+
pi, p = emmagg.aggregate(votes, K=2, epsilon=1e-8, max_iters=1000, epsilon_mm=1e-8, max_iters_mm=10)
172+
173+
sol_params = np.empty(2*m+1)
174+
sol_params[0] = pi[0]
175+
sol_params[1:m+1] = p[0]
176+
sol_params[m+1:] = p[1]
177+
178+
print("Ground-Truth Parameters:\n" + str(params))
179+
print("Final Solution:\n" + str(sol_params))
180+
print("\t\"1 - alpha\" = " + str(pi[1]))
181+
print("WSSE:\n" + str(stats.mix2PL_wsse(params, sol_params, m)))
182+
183+
if __name__ == "__main__":
184+
main()

0 commit comments

Comments
 (0)