Skip to content

Commit 239c95e

Browse files
committed
batch query for authorities
1 parent 8abfae6 commit 239c95e

1 file changed

Lines changed: 11 additions & 26 deletions

File tree

import-scripts/importUsersClickhouse.py

Lines changed: 11 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -271,26 +271,6 @@ def get_current_user_map(ch_client):
271271

272272
return to_return
273273

274-
# ------------------------------------------------------------------------------
275-
# get current user authorities
276-
277-
def get_user_authorities(ch_client, google_email):
278-
279-
# list of authorities (cancer studies) we are returning -- as a set
280-
to_return = []
281-
282-
# recall each tuple in authorities table is ['EMAIL', 'AUTHORITY']
283-
# no tuple can contain nulls
284-
try:
285-
result = ch_client.query('SELECT * FROM authorities WHERE email = {email:String}',
286-
parameters={'email': google_email})
287-
for row in result.result_rows:
288-
to_return.append(row[1])
289-
except Exception as msg:
290-
print(msg, file=ERROR_FILE)
291-
return None
292-
293-
return to_return
294274

295275
# ------------------------------------------------------------------------------
296276
# get current users from google spreadsheet
@@ -369,7 +349,6 @@ def get_rejected_user_map(spreadsheet, sheet_records, current_user_map, portal_n
369349
else:
370350
to_return[google_email.lower()] = User(inst_email, google_email, name, 0,
371351
[portal_name + ':' + au for au in authorities.split(';')])
372-
print('Rejected user added to list: %s' % google_email.lower(), file=OUTPUT_FILE)
373352
return to_return
374353

375354
# ------------------------------------------------------------------------------
@@ -477,15 +456,21 @@ def update_user_authorities(spreadsheet, ch_client, sheet_records, portal_name):
477456
if all_user_map is None:
478457
return None
479458
total = len(all_user_map)
480-
print('Updating authorities for %d user(s) in current portal user list' % total, file=OUTPUT_FILE)
459+
print('Fetching existing authorities for %d user(s) in batch' % total, file=OUTPUT_FILE)
460+
emails = list(all_user_map.keys())
461+
result = ch_client.query('SELECT email, authority FROM authorities WHERE email IN {emails:Array(String)}',
462+
parameters={'emails': emails})
463+
db_authorities_map = {}
464+
for row in result.result_rows:
465+
db_authorities_map.setdefault(row[0].lower(), set()).add(row[1])
466+
481467
new_authority_pairs = []
482-
for i, user in enumerate(all_user_map.values(), 1):
483-
print(' [%d/%d] checking authorities for %s' % (i, total, user.google_email), file=OUTPUT_FILE)
468+
for user in all_user_map.values():
484469
sheet_authorities = set(user.authorities)
485-
db_authorities = set(get_user_authorities(ch_client, user.google_email))
470+
db_authorities = db_authorities_map.get(user.google_email, set())
486471
added = [(user.google_email, authority) for authority in sheet_authorities - db_authorities]
487472
if added:
488-
print(' -> adding %d new authority(s): %s' % (len(added), [a for _, a in added]), file=OUTPUT_FILE)
473+
print(' %s: adding %d new authority(s)' % (user.google_email, len(added)), file=OUTPUT_FILE)
489474
new_authority_pairs += added
490475
if new_authority_pairs:
491476
print('Inserting %d new authority pair(s) into ClickHouse' % len(new_authority_pairs), file=OUTPUT_FILE)

0 commit comments

Comments
 (0)