Skip to content

Commit 1487db0

Browse files
committed
Add implementations of GMM and EMM for Mix-PL
- Add generalized method of moments (GMM) algorithm proposed in “Learning Mixtures of Plackett-Luce (PL) Models” by Zhao, Piech, & Xia (2016) - Add Expectation Minorization Maximization (EMM) algorithm for mixtures of PL from Gormley & Murphy (2008) - Add various programs for running experiments for testing the fitness of the GMM and EMM algorithms added in this commit - Add functions for generation of ranking datasets of mixture of PL models - Add program of experiments comparing the performance of the GMM and MM algorithm implementations for learning single models of PL - Add new functions that assist in computing statistical error measures in learned parameters for mixture models of PL - Update the readme
1 parent f4ddc5c commit 1487db0

21 files changed

Lines changed: 2188 additions & 56 deletions

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# ignore compilation files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class

README.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,19 @@ prefpy
44
Rank aggregation algorithms in the computer science field of computational social choice
55

66

7+
What's New
8+
==========
9+
10+
- Generalized method of moments algorithm for mixture of Plackett-Luce models
11+
- Implementation of Mixture Model for Plackett-Luce EMM algorithm by Gormley & Murphy for the "no dampening" case (i.e. the dampening parameters are all fixed at 1)
12+
13+
714
Work In Progress
815
================
916

10-
- This is the very initial version in a Python package form, further structural changes will be coming
17+
- This is an initial version of the Python package form, further structural changes will be coming
1118
- Module naming conventions will be changed; currently the algorithm files take the initials of the names of the papers from which they originate (e.g. "gmmra" for Generalized Method of Moments for Rank Aggregation)
1219
- Mixture Model for Plackett-Luce EMM algorithm by Gormley & Murphy is forthcoming pending verification and testing of the method
13-
- Generalized method of moments algorithm for Plackett-Luce
1420
- Random utility model algorithms (verification of the implentation needs to be completed)
1521

1622

prefpy/evbwie.py

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
# Implementation of algorithm (2) from
2+
# Exploring Voting Blocs Within the Irish Electorate:
3+
# A Mixture Modeling Approach by Gormley and Murphy, 2008
4+
5+
import numpy as np
6+
import aggregate
7+
import plackettluce as pl
8+
import stats
9+
10+
11+
class EMMMixPLResult:
12+
def __init__(self, num_alts, num_votes, num_mix, true_params, epsilon, max_iters, epsilon_mm, max_iters_mm, init_guess, soln_params, runtime):
13+
self.num_alts = num_alts
14+
self.num_votes = num_votes
15+
self.num_mix = num_mix
16+
self.true_params = true_params
17+
self.epsilon = epsilon
18+
self.max_iters = max_iters
19+
self.epsilon_mm = epsilon_mm
20+
self.max_iters_mm = max_iters_mm
21+
self.init_guess = init_guess
22+
self.soln_params = soln_params
23+
self.runtime = runtime
24+
25+
class EMMMixPLAggregator(aggregate.RankAggregator):
26+
27+
def c(x_i, j):
28+
try:
29+
return x_i[j]
30+
except IndexError:
31+
return -1
32+
33+
def f(x_i, p):
34+
prod = 1
35+
for t in range(len(x_i)):
36+
denom_sum = 0
37+
for s in range(t, len(p)):
38+
denom_sum += p[EMMMixPLAggregator.c(x_i, s)]
39+
prod *= p[EMMMixPLAggregator.c(x_i, t)] / denom_sum
40+
return prod
41+
42+
def indic(j, x_i, s):
43+
flag = j == EMMMixPLAggregator.c(x_i, s)
44+
if flag:
45+
return 1
46+
else:
47+
return 0
48+
49+
def delta(x_i, j, s, N):
50+
""" delta_i_j_s """
51+
flag = j == EMMMixPLAggregator.c(x_i, s)
52+
if flag and s < len(x_i):
53+
return 1
54+
elif s == N:
55+
found_equal = False
56+
for l in range(len(x_i)):
57+
if j == EMMMixPLAggregator.c(x_i, l):
58+
found_equal = True
59+
break
60+
if not found_equal:
61+
return 1
62+
return 0
63+
64+
def omega(k, j, z, x):
65+
""" omega_k_j """
66+
sum_out = 0
67+
for i in range(len(x)):
68+
sum_in = 0
69+
for t in range(len(x[i])):
70+
sum_in += z[i][k] * EMMMixPLAggregator.indic(j, x[i], t)
71+
sum_out += sum_in
72+
return sum_out
73+
74+
def aggregate(self, rankings, K, epsilon, max_iters, epsilon_mm, max_iters_mm):
75+
x = rankings # shorter pseudonym for voting data
76+
self.n = len(rankings) # number of votes
77+
78+
# pre-compute the delta values
79+
delta_i_j_s = np.empty((self.n, self.m, self.m + 1))
80+
for i in range(self.n):
81+
for j in range(self.m):
82+
for s in range(self.m + 1):
83+
delta_i_j_s[i][j][s] = EMMMixPLAggregator.delta(x[i], j, s, self.m)
84+
85+
# generate initial values for p and pi:
86+
p_h0 = np.random.rand(K, self.m)
87+
p_h0 /= np.sum(p_h0, axis=1, keepdims=True)
88+
89+
pi_h0 = np.random.rand(K)
90+
pi_h0 /= np.sum(pi_h0)
91+
92+
p_h = np.copy(p_h0)
93+
pi_h = np.copy(pi_h0)
94+
95+
for g in range(max_iters):
96+
97+
p_h1 = np.empty((K, self.m))
98+
pi_h1 = np.empty(K)
99+
z_h1 = np.empty((self.n, K))
100+
101+
# E-Step:
102+
for i in range(self.n):
103+
for k in range(K):
104+
denom_sum = 0
105+
for k2 in range(K):
106+
denom_sum += pi_h[k2] * EMMMixPLAggregator.f(x[i], p_h[k2])
107+
z_h1[i][k] = (pi_h[k] * EMMMixPLAggregator.f(x[i], p_h[k])) / denom_sum
108+
109+
# M-Step:
110+
#for l in range(max_iters_mm):
111+
for l in range(int(g/50) + 5):
112+
for k in range(K):
113+
normconst = 0
114+
pi_h1[k] = np.sum(z_h1.T[k]) / len(z_h1)
115+
for j in range(self.m):
116+
omega_k_j = EMMMixPLAggregator.omega(k, j, z_h1, x) # numerator
117+
denom_sum = 0
118+
for i in range(self.n):
119+
sum1 = 0
120+
for t in range(len(x[i])):
121+
sum2 = 0
122+
sum3 = 0
123+
for s in range(t, self.m):
124+
sum2 += p_h[k][EMMMixPLAggregator.c(x[i], s)]
125+
for s in range(t, self.m + 1):
126+
sum3 += delta_i_j_s[i][j][s]
127+
sum1 += z_h1[i][k] * (sum2 ** -1) * sum3
128+
denom_sum += sum1
129+
p_h1[k][j] = omega_k_j / denom_sum
130+
normconst += p_h1[k][j]
131+
for j in range(self.m):
132+
p_h1[k][j] /= normconst
133+
134+
if (epsilon_mm != None and
135+
np.all(np.absolute(p_h1 - p_h) < epsilon_mm) and
136+
np.all(np.absolute(pi_h1 - pi_h) < epsilon_mm)):
137+
break
138+
139+
if (epsilon != None and
140+
np.all(np.absolute(p_h1 - p_h) < epsilon) and
141+
np.all(np.absolute(pi_h1 - pi_h) < epsilon)):
142+
break
143+
144+
p_h = p_h1
145+
pi_h = pi_h1
146+
147+
return (pi_h1, p_h1, pi_h0, p_h0)
148+
149+
def main():
150+
n = 100
151+
m = 4
152+
k = 2
153+
cand_set = np.arange(m)
154+
#np.random.seed(0)
155+
params, votes = pl.generate_mix2pl_dataset(n, m, useDirichlet=True)
156+
print("Ground-Truth Parameters:\n" + str(params))
157+
print("EMM Algorithm:")
158+
159+
emmagg = EMMMixPLAggregator(cand_set)
160+
pi, p = emmagg.aggregate(votes, K=2, epsilon=1e-8, max_iters=1000, epsilon_mm=1e-8, max_iters_mm=10)
161+
162+
sol_params = np.empty(2*m+1)
163+
sol_params[0] = pi[0]
164+
sol_params[1:m+1] = p[0]
165+
sol_params[m+1:] = p[1]
166+
167+
print("Ground-Truth Parameters:\n" + str(params))
168+
print("Final Solution:\n" + str(sol_params))
169+
print("\t\"1 - alpha\" = " + str(pi[1]))
170+
print("WSSE:\n" + str(stats.mix2PL_wsse(params, sol_params, m)))
171+
172+
if __name__ == "__main__":
173+
main()

prefpy/gen_mixpl_datasets.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import sys
2+
import numpy as np
3+
import plackettluce as pl
4+
5+
def print_usage(argv0):
6+
print("USAGE: python3 " + argv0 + " <num alts> <num votes> <num datasets> <dataset(s) base filename.csv>")
7+
sys.exit()
8+
9+
def main(argv):
10+
if len(argv) != 5:
11+
print_usage(argv[0])
12+
m = int(argv[1])
13+
n = int(argv[2])
14+
len_d = str(len(argv[3]))
15+
d = int(argv[3])
16+
filename_base = argv[4]
17+
18+
print("i = ", end='')
19+
for i in range(d):
20+
print("\b"*len(str(i-1)) + str(i), end='')
21+
sys.stdout.flush()
22+
outfilename = filename_base + '_' + ("{0:0" + len_d + "d}").format(i) + ".csv"
23+
outfile = open(outfilename, 'w')
24+
pl._generate_mix2pl_dataset(n, m, outfile, True)
25+
outfile.close()
26+
27+
if __name__ == "__main__":
28+
main(sys.argv)

0 commit comments

Comments
 (0)