|
| 1 | +# Implementation of algorithm (2) from |
| 2 | +# Exploring Voting Blocs Within the Irish Electorate: |
| 3 | +# A Mixture Modeling Approach by Gormley and Murphy, 2008 |
| 4 | + |
| 5 | +import numpy as np |
| 6 | +from . import aggregate |
| 7 | +from . import plackettluce as pl |
| 8 | +from . import stats |
| 9 | + |
| 10 | + |
| 11 | +class EMMMixPLResult: |
| 12 | + def __init__(self, num_alts, num_votes, num_mix, true_params, epsilon, max_iters, epsilon_mm, max_iters_mm, init_guess, soln_params, runtime): |
| 13 | + self.num_alts = num_alts |
| 14 | + self.num_votes = num_votes |
| 15 | + self.num_mix = num_mix |
| 16 | + self.true_params = true_params |
| 17 | + self.epsilon = epsilon |
| 18 | + self.max_iters = max_iters |
| 19 | + self.epsilon_mm = epsilon_mm |
| 20 | + self.max_iters_mm = max_iters_mm |
| 21 | + self.init_guess = init_guess |
| 22 | + self.soln_params = soln_params |
| 23 | + self.runtime = runtime |
| 24 | + |
| 25 | +class EMMMixPLAggregator(aggregate.RankAggregator): |
| 26 | + |
| 27 | + def c(x_i, j): |
| 28 | + try: |
| 29 | + return x_i[j] |
| 30 | + except IndexError: |
| 31 | + return -1 |
| 32 | + |
| 33 | + def f(x_i, p): |
| 34 | + prod = 1 |
| 35 | + for t in range(len(x_i)): |
| 36 | + denom_sum = 0 |
| 37 | + for s in range(t, len(p)): |
| 38 | + denom_sum += p[EMMMixPLAggregator.c(x_i, s)] |
| 39 | + prod *= p[EMMMixPLAggregator.c(x_i, t)] / denom_sum |
| 40 | + return prod |
| 41 | + |
| 42 | + def indic(j, x_i, s): |
| 43 | + flag = j == EMMMixPLAggregator.c(x_i, s) |
| 44 | + if flag: |
| 45 | + return 1 |
| 46 | + else: |
| 47 | + return 0 |
| 48 | + |
| 49 | + def delta(x_i, j, s, N): |
| 50 | + """ delta_i_j_s """ |
| 51 | + flag = j == EMMMixPLAggregator.c(x_i, s) |
| 52 | + if flag and s < len(x_i): |
| 53 | + return 1 |
| 54 | + elif s == N: |
| 55 | + found_equal = False |
| 56 | + for l in range(len(x_i)): |
| 57 | + if j == EMMMixPLAggregator.c(x_i, l): |
| 58 | + found_equal = True |
| 59 | + break |
| 60 | + if not found_equal: |
| 61 | + return 1 |
| 62 | + return 0 |
| 63 | + |
| 64 | + def omega(k, j, z, x): |
| 65 | + """ omega_k_j """ |
| 66 | + sum_out = 0 |
| 67 | + for i in range(len(x)): |
| 68 | + sum_in = 0 |
| 69 | + for t in range(len(x[i])): |
| 70 | + sum_in += z[i][k] * EMMMixPLAggregator.indic(j, x[i], t) |
| 71 | + sum_out += sum_in |
| 72 | + return sum_out |
| 73 | + |
| 74 | + def aggregate(self, rankings, K, epsilon, tot_iters, epsilon_mm, max_iters_em): |
| 75 | + x = rankings # shorter pseudonym for voting data |
| 76 | + self.n = len(rankings) # number of votes |
| 77 | + |
| 78 | + # pre-compute the delta values |
| 79 | + delta_i_j_s = np.empty((self.n, self.m, self.m + 1)) |
| 80 | + for i in range(self.n): |
| 81 | + for j in range(self.m): |
| 82 | + for s in range(self.m + 1): |
| 83 | + delta_i_j_s[i][j][s] = EMMMixPLAggregator.delta(x[i], j, s, self.m) |
| 84 | + |
| 85 | + # generate initial values for p and pi: |
| 86 | + p_h0 = np.random.rand(K, self.m) |
| 87 | + p_h0 /= np.sum(p_h0, axis=1, keepdims=True) |
| 88 | + |
| 89 | + pi_h0 = np.random.rand(K) |
| 90 | + pi_h0 /= np.sum(pi_h0) |
| 91 | + |
| 92 | + p_h = np.copy(p_h0) |
| 93 | + pi_h = np.copy(pi_h0) |
| 94 | + |
| 95 | + inner = tot_iters // max_iters_em |
| 96 | + |
| 97 | + for g in range(max_iters_em): |
| 98 | + #for g in range(max_iters): |
| 99 | + |
| 100 | + p_h1 = np.empty((K, self.m)) |
| 101 | + pi_h1 = np.empty(K) |
| 102 | + z_h1 = np.empty((self.n, K)) |
| 103 | + |
| 104 | + # E-Step: |
| 105 | + for i in range(self.n): |
| 106 | + for k in range(K): |
| 107 | + denom_sum = 0 |
| 108 | + for k2 in range(K): |
| 109 | + denom_sum += pi_h[k2] * EMMMixPLAggregator.f(x[i], p_h[k2]) |
| 110 | + z_h1[i][k] = (pi_h[k] * EMMMixPLAggregator.f(x[i], p_h[k])) / denom_sum |
| 111 | + |
| 112 | + # M-Step: |
| 113 | + test = (g + 1) * inner + (max_iters_em - g - 1) * (inner + 1) |
| 114 | + if test < tot_iters: |
| 115 | + inner += 1 |
| 116 | + for l in range(inner): |
| 117 | + #for l in range(max_iters_mm): |
| 118 | + #for l in range(int(g/50) + 5): |
| 119 | + for k in range(K): |
| 120 | + normconst = 0 |
| 121 | + if l == 0: # only need to compute pi at first MM iteration |
| 122 | + pi_h1[k] = np.sum(z_h1.T[k]) / len(z_h1) |
| 123 | + for j in range(self.m): |
| 124 | + omega_k_j = EMMMixPLAggregator.omega(k, j, z_h1, x) # numerator |
| 125 | + denom_sum = 0 |
| 126 | + for i in range(self.n): |
| 127 | + sum1 = 0 |
| 128 | + for t in range(len(x[i])): |
| 129 | + sum2 = 0 |
| 130 | + sum3 = 0 |
| 131 | + for s in range(t, self.m): |
| 132 | + sum2 += p_h[k][EMMMixPLAggregator.c(x[i], s)] |
| 133 | + for s in range(t, self.m + 1): |
| 134 | + sum3 += delta_i_j_s[i][j][s] |
| 135 | + sum1 += z_h1[i][k] * (sum2 ** -1) * sum3 |
| 136 | + denom_sum += sum1 |
| 137 | + p_h1[k][j] = omega_k_j / denom_sum |
| 138 | + normconst += p_h1[k][j] |
| 139 | + for j in range(self.m): |
| 140 | + p_h1[k][j] /= normconst |
| 141 | + |
| 142 | + if (epsilon_mm != None and |
| 143 | + np.all(np.absolute(p_h1 - p_h) < epsilon_mm)): |
| 144 | + break |
| 145 | + |
| 146 | + p_h = np.copy(p_h1) # deep copy p for next MM iteration |
| 147 | + # pi does not change across MM iterations, no copy needed |
| 148 | + |
| 149 | + if (epsilon != None and |
| 150 | + np.all(np.absolute(p_h1 - p_h) < epsilon) and |
| 151 | + np.all(np.absolute(pi_h1 - pi_h) < epsilon)): |
| 152 | + break |
| 153 | + |
| 154 | + # remember that assignments below are references only, not copies |
| 155 | + p_h = p_h1 |
| 156 | + pi_h = pi_h1 |
| 157 | + |
| 158 | + return (pi_h1, p_h1, pi_h0, p_h0) |
| 159 | + |
| 160 | +def main(): |
| 161 | + n = 100 |
| 162 | + m = 4 |
| 163 | + k = 2 |
| 164 | + cand_set = np.arange(m) |
| 165 | + #np.random.seed(0) |
| 166 | + params, votes = pl.generate_mix2pl_dataset(n, m, useDirichlet=True) |
| 167 | + print("Ground-Truth Parameters:\n" + str(params)) |
| 168 | + print("EMM Algorithm:") |
| 169 | + |
| 170 | + emmagg = EMMMixPLAggregator(cand_set) |
| 171 | + pi, p = emmagg.aggregate(votes, K=2, epsilon=1e-8, max_iters=1000, epsilon_mm=1e-8, max_iters_mm=10) |
| 172 | + |
| 173 | + sol_params = np.empty(2*m+1) |
| 174 | + sol_params[0] = pi[0] |
| 175 | + sol_params[1:m+1] = p[0] |
| 176 | + sol_params[m+1:] = p[1] |
| 177 | + |
| 178 | + print("Ground-Truth Parameters:\n" + str(params)) |
| 179 | + print("Final Solution:\n" + str(sol_params)) |
| 180 | + print("\t\"1 - alpha\" = " + str(pi[1])) |
| 181 | + print("WSSE:\n" + str(stats.mix2PL_wsse(params, sol_params, m))) |
| 182 | + |
| 183 | +if __name__ == "__main__": |
| 184 | + main() |
0 commit comments