-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgenerate_prolific_matchings.py
More file actions
128 lines (105 loc) · 6.11 KB
/
generate_prolific_matchings.py
File metadata and controls
128 lines (105 loc) · 6.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import numpy as np
import random
from tqdm import tqdm
import os
import os.path as osp
import time
import pickle as pkl
import pandas as pd
from MWBM.data_generation import Pool, create_biased_probs
from MWBM.mwbm_algs import BMWBCM, MWBCM
# create a folder to save the data
if not osp.exists('data'):
os.makedirs('data')
timestamp = time.strftime("%Y%m%d-%H%M%S")
CONFIG_DIR = osp.join('data', timestamp)
os.makedirs(CONFIG_DIR)
# Set random seeds
random.seed(0)
np.random.seed(0)
N_POOLS = 40 #* Modify as needed
N = 20 #* Modify as needed
M = 10
capacities_types = np.array([2]) #* Modify as needed
days_keys = ['Mo-am', 'Mo-pm', 'Tu-am', 'Tu-pm', 'We-am', 'We-pm', 'Th-am', 'Th-pm', 'Fr-am', 'Fr-pm']
prob_y_per_x = [
[[.2,.3], [20,20], [.2,.3], [20,17], [17, 20], [20,20], [.2,.4], [19, 12], [.2,.3], [.15,.2]], #probs of x1
[[20,20], [.2,.4], [19,12], [.2,.3], [.15,.2], [.2,.3], [20,20], [.2, .3], [20,17], [17, 20]], #probs of x2
[[ 1,10], [ 1,10] ,[ 5,10] ,[ 5, 2], [3.1, 4], [19,12], [.2,.3], [.15,.2], [.2,.3], [20, 20]] #probs of x3
]
prob_x = np.array([.2, .45, .35])
prob_y_per_x = np.array(prob_y_per_x)
x_types = len(prob_x)
assert prob_y_per_x.shape == (x_types, M, 2), prob_y_per_x.shape
#save the data
np.save(osp.join(CONFIG_DIR, 'prob_y_per_x.npy'), prob_y_per_x)
np.save(osp.join(CONFIG_DIR, 'capacities_types.npy'), capacities_types)
np.save(osp.join(CONFIG_DIR, 'prob_x.npy'), prob_x)
progress = tqdm(range(N_POOLS), desc='Generating pools', position=0, leave=True)
for pool_idx in progress:
# realization of the pool
pool = Pool(N, M, prob_x, prob_y_per_x, capacities_types, type_prob='quantile',
bias_loc=None, type_bias_loc=None, tau_loc=None, prob_eps=0.005)
# Model's noisy probabilities - average mode created probabilities as explained in the paper.
g_alg = create_biased_probs(pool.g, mode='average', prob_y_per_x=prob_y_per_x,
bias_locs=None, pool_feats=pool.people_feat)
#optimal matching - perfect probs
matching_real_prob = MWBCM(people=pool.people_idx, locations=pool.locations_idx,
capacities=pool.capacities, g=pool.g, verbose = False)
matching_real_prob = matching_real_prob.solve_LP(return_solution=True)
#Save realization of the pool
pool_dir = osp.join(CONFIG_DIR, f'pool{pool_idx}')
if osp.exists(pool_dir):
raise ValueError(f'Pool {pool_idx} already exists')
else:
os.makedirs(pool_dir)
#save the pool
np.save(osp.join(pool_dir, 'people_idx.npy'), pool.people_idx)
np.save(osp.join(pool_dir, 'people_feat.npy'), pool.people_feat)
np.save(osp.join(pool_dir, 'locations_idx.npy'), pool.locations_idx)
np.save(osp.join(pool_dir, 'capacities.npy'), pool.capacities)
np.save(osp.join(pool_dir, 'g.npy'), pool.g)
np.save(osp.join(pool_dir, 'g_alg.npy'), g_alg)
pkl.dump(pool, open(osp.join(pool_dir, 'pool.pkl'), 'wb'))
np.save(osp.join(pool_dir, 'matching_opt_real_prob.npy'), matching_real_prob)
num_matchings = min(pool.N, pool.C)
B_progress = tqdm(range(num_matchings+1), desc='Biased matchings', position=1, leave=False)
for B in B_progress:
# create folder for the specific B
pool_b_dir = osp.join(pool_dir, f'B{B}')
if osp.exists(pool_b_dir):
raise ValueError(f'Pool {pool_idx} already exists')
else:
os.makedirs(pool_b_dir)
#copy pool
B_pool = pkl.load(open(osp.join(pool_dir, 'pool.pkl'), 'rb'))
total_LP_matchings = num_matchings - B
if total_LP_matchings > 0:
alg_matching_solver = BMWBCM(people=B_pool.people_idx, locations=B_pool.locations_idx,
capacities=B_pool.capacities, g=g_alg, B=total_LP_matchings, verbose = False)
alg_matching = alg_matching_solver.solve_LP(return_solution=True)
B_pool.set_matching_pairs(alg_matching)
np.savetxt(osp.join(pool_b_dir, 'alg_matching.csv'), alg_matching.astype(int), delimiter=",")
#assert B_pool.remaining_people.shape[0]==B, f"Number of remaining people is {pool.remaining_people.shape[0]}, but should be {B}."
if B_pool.remaining_capacities.sum() > 0 and B_pool.remaining_people.size>0 and B>0:
opt_human_solver = MWBCM(people=B_pool.remaining_people, locations=B_pool.locations_idx,
capacities=B_pool.remaining_capacities, g=B_pool.g, verbose = False)
opt_human_rem_match = opt_human_solver.solve_LP(return_solution=True) #split but perfect
#Save as int
np.savetxt(osp.join(pool_b_dir, 'opt_rem_match.csv'), opt_human_rem_match.astype(int), delimiter=",", fmt='%i')
remaining_g = B_pool.g[B_pool.remaining_people]
assert B_pool.remaining_people.shape[0]==B, f"Number of remaining people is {B_pool.remaining_people.shape[0]}, but should be {B}."
assert remaining_g.shape[0]==B, f"Number of remaining people is {B_pool.remaining_people.shape[0]}, but should be {B}."
np.savetxt(osp.join(pool_b_dir, 'remaining_people.csv'), B_pool.remaining_people, delimiter=",", fmt='%i')
np.savetxt(osp.join(pool_b_dir, 'remaining_capacities.csv'), B_pool.remaining_capacities, delimiter=",", fmt='%i')
np.savetxt(osp.join(pool_b_dir, 'remaining_g.csv'), remaining_g, delimiter=",", fmt='%1.2f')
#Save the remaining capacities with name of columns (Day, Remaining) where day is Mo-am, Mo-pm, Tu-am, Tu-pm, etc. and remaining is B_pool_remaining_capacities
assert len(days_keys) == B_pool.remaining_capacities.shape[0], f"Number of days_keys is {B_pool.remaining_capacities.shape[0]}, but should be {len(days_keys)}."
df = pd.DataFrame(B_pool.remaining_capacities, columns=['Remaining'], index=days_keys)
df.index.name = 'Day'
df.to_csv(osp.join(pool_b_dir, 'rem_capacities_TAB.csv'))
#save the remaining g with the remaining people as index and the days_keys as columns
df = pd.DataFrame(remaining_g, columns=days_keys)
df.index = B_pool.remaining_people
df.index.name = 'Person'
df.to_csv(osp.join(pool_b_dir, 'rem_probs_TAB.csv'))