diff --git a/import-scripts/import-portal-users.sh b/import-scripts/import-portal-users.sh old mode 100755 new mode 100644 index d8dcd8755..ff0342c91 --- a/import-scripts/import-portal-users.sh +++ b/import-scripts/import-portal-users.sh @@ -13,11 +13,6 @@ MY_FLOCK_FILEPATH="/data/portal-cron/cron-lock/import-portal-users.lock" SMTP_SERVER=`cat $MAIL_SMTP_SERVER` GENIE_BLUE_DATABASE_PROPERTIES_FILENAME="portal.properties.genie.blue" GENIE_GREEN_DATABASE_PROPERTIES_FILENAME="portal.properties.genie.green" - GENIE_BLUE_CLICKHOUSE_CLIENT_CONFIG_FILEPATH="/data/portal-cron/pipelines-credentials/clickhouse_client_genie_blue_config.yaml" - GENIE_GREEN_CLICKHOUSE_CLIENT_CONFIG_FILEPATH="/data/portal-cron/pipelines-credentials/clickhouse_client_genie_green_config.yaml" - CLICKHOUSE_COMMANDS_PENDING_DIRPATH="/data/portal-cron/tmp/import-users-genie/pending_clickhouse_commands" - CLICKHOUSE_COMMANDS_COMPLETED_DIRPATH="/data/portal-cron/tmp/import-users-genie/completed_clickhouse_commands" - CLICKHOUSE_BINARY_FILEPATH="/home/cbioportal_importer/tools/clickhouse/bin/clickhouse" GET_DATABASE_CURRENTLY_IN_PRODUCTION_SCRIPT="$PORTAL_HOME/scripts/get_database_currently_in_production.sh" MANAGE_DATABASE_TOOL_PROPERTIES_FILEPATH="$PORTAL_HOME/pipelines-credentials/manage_genie_database_update_tools.properties" CURRENT_DATABASE_OUTPUT_FILEPATH=$(mktemp $PORTAL_HOME/tmp/import-portal-users/get_current_database_output.txt.XXXXXX) @@ -29,11 +24,9 @@ MY_FLOCK_FILEPATH="/data/portal-cron/cron-lock/import-portal-users.lock" rm -f $CURRENT_DATABASE_OUTPUT_FILEPATH if [ ${current_production_database_color:0:5} == "green" ] ; then GENIE_PRODUCTION_DATABASE_PROPERTIES_FILENAME="$GENIE_GREEN_DATABASE_PROPERTIES_FILENAME" - GENIE_CLICKHOUSE_CLIENT_CONFIG_FILEPATH="$GENIE_GREEN_CLICKHOUSE_CLIENT_CONFIG_FILEPATH" else if [ ${current_production_database_color:0:4} == "blue" ] ; then GENIE_PRODUCTION_DATABASE_PROPERTIES_FILENAME="$GENIE_BLUE_DATABASE_PROPERTIES_FILENAME" - GENIE_CLICKHOUSE_CLIENT_CONFIG_FILEPATH="$GENIE_BLUE_CLICKHOUSE_CLIENT_CONFIG_FILEPATH" else echo "error : Failed to properly detect the current production database for genie" >> "$USERSGENIELOGFILENAME" fi @@ -41,20 +34,8 @@ MY_FLOCK_FILEPATH="/data/portal-cron/cron-lock/import-portal-users.lock" echo "### Starting import" >> "$USERSGENIELOGFILENAME" date >> "$USERSGENIELOGFILENAME" - $PYTHON_BINARY $PORTAL_HOME/scripts/importUsers.py --port 3306 --secrets-file $PIPELINES_CONFIG_HOME/google-docs/client_secrets.json --creds-file $PIPELINES_CONFIG_HOME/google-docs/creds.dat --properties-file $PIPELINES_CONFIG_HOME/properties/import-users/${GENIE_PRODUCTION_DATABASE_PROPERTIES_FILENAME} --send-email-confirm true --sender GENIE --ssl-ca $PORTAL_HOME/pipelines-credentials/pipelines-genie-db-aws-rds-combined-ca-bundle.pem --gmail-username $GMAIL_USERNAME --gmail-password $GMAIL_PASSWORD --smtp-server $SMTP_SERVER >> "$USERSGENIELOGFILENAME" 2>&1 + $PYTHON3_BINARY $PORTAL_HOME/scripts/importUsers.py --secrets-file $PIPELINES_CONFIG_HOME/google-docs/client_secrets.json --creds-file $PIPELINES_CONFIG_HOME/google-docs/creds.dat --properties-file $PIPELINES_CONFIG_HOME/properties/import-users/${GENIE_PRODUCTION_DATABASE_PROPERTIES_FILENAME} --send-email-confirm true --sender GENIE --gmail-username $GMAIL_USERNAME --gmail-password $GMAIL_PASSWORD --smtp-server $SMTP_SERVER >> "$USERSGENIELOGFILENAME" 2>&1 CGDS_GENIE_IMPORT_STATUS=$? - clickhouse_commands_filepath=$(ls ${CLICKHOUSE_COMMANDS_PENDING_DIRPATH}/*.sql 2>/dev/null | head -n 1) - if [ $CGDS_GENIE_IMPORT_STATUS -eq 0 ] && [ -f "$clickhouse_commands_filepath" ] ; then - echo "executing clickhouse commands from $clickhouse_commands_filepath" >> "$USERSGENIELOGFILENAME" - date >> "$USERSGENIELOGFILENAME" - "$CLICKHOUSE_BINARY_FILEPATH" client --config-file="$GENIE_CLICKHOUSE_CLIENT_CONFIG_FILEPATH" --multiquery < "$clickhouse_commands_filepath" - CLICKHOUSE_IMPORT_STATUS=$? - if [ $CLICKHOUSE_IMPORT_STATUS -eq 0 ] ; then - mv "$clickhouse_commands_filepath" "$CLICKHOUSE_COMMANDS_COMPLETED_DIRPATH" - else - $CGDS_GENIE_IMPORT_STATUS=2 # failure flag - fi - fi PIPELINES_EMAIL_LIST="cbioportal-pipelines@cbioportal.org" FAILED_DATABASES="" diff --git a/import-scripts/importUsers.py b/import-scripts/importUsers.py index 5b78b4648..75d76c16c 100755 --- a/import-scripts/importUsers.py +++ b/import-scripts/importUsers.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python +#!/usr/bin/env python3 # ------------------------------------------------------------------------------ # Script which adds new users from google spreadsheet into the the cgds # user table. The following properties must be specified in portal.properties: @@ -23,9 +23,8 @@ # imports import os import sys -import time import getopt -import MySQLdb +import clickhouse_connect import re import datetime @@ -37,30 +36,25 @@ from oauth2client.client import flow_from_clientsecrets from oauth2client.tools import run_flow, argparser -from email.MIMEMultipart import MIMEMultipart -from email.MIMEBase import MIMEBase -from email.MIMEText import MIMEText -from email.Header import Header -from email.Utils import COMMASPACE, formatdate -from email import Encoders +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText +from email.header import Header +from email.utils import COMMASPACE, formatdate from googleapiclient.discovery import build # ------------------------------------------------------------------------------ # globals -# hardcoded file path for clickhouse -UNIX_EPOC_SECONDS = str(int(time.time())) -CLICKHOUSE_COMMANDS_FILEPATH = '/data/portal-cron/tmp/import-users-genie/pending_clickhouse_commands/%s.sql' % UNIX_EPOC_SECONDS - # some file descriptors ERROR_FILE = sys.stderr OUTPUT_FILE = sys.stdout # fields in portal.properties -CGDS_DATABASE_HOST = 'db.host' -CGDS_DATABASE_NAME = 'db.portal_db_name' -CGDS_DATABASE_USER = 'db.user' -CGDS_DATABASE_PW = 'db.password' +CLICKHOUSE_HOST = 'clickhouse.host' +CLICKHOUSE_PORT = 'clickhouse.port' +CLICKHOUSE_USER = 'clickhouse.user' +CLICKHOUSE_PW = 'clickhouse.password' +CLICKHOUSE_DB = 'clickhouse.db' GOOGLE_ID = 'google.id' GOOGLE_PW = 'google.pw' CGDS_USERS_SPREADSHEET = 'users.spreadsheet' @@ -112,12 +106,13 @@ class PortalProperties(object): def __init__(self, - cgds_database_host, cgds_database_name, cgds_database_user, cgds_database_pw, - google_id, google_pw, google_spreadsheet, google_worksheet,google_importer_spreadsheet): - self.cgds_database_host = cgds_database_host - self.cgds_database_name = cgds_database_name - self.cgds_database_user = cgds_database_user - self.cgds_database_pw = cgds_database_pw + clickhouse_host, clickhouse_port, clickhouse_user, clickhouse_pw, clickhouse_db, + google_id, google_pw, google_spreadsheet, google_worksheet, google_importer_spreadsheet): + self.clickhouse_host = clickhouse_host + self.clickhouse_port = clickhouse_port + self.clickhouse_user = clickhouse_user + self.clickhouse_pw = clickhouse_pw + self.clickhouse_db = clickhouse_db self.google_id = google_id self.google_pw = google_pw self.google_spreadsheet = google_spreadsheet @@ -141,7 +136,7 @@ def __init__(self, inst_email, google_email, name, enabled, authorities): def send_mail(to, subject, body, gmail_username, gmail_password, sender=MESSAGE_FROM_CMO, bcc=MESSAGE_BCC_CMO, server=None): if server is None: - print >> ERROR_FILE, 'smtp server must be specified' + print('smtp server must be specified', file=ERROR_FILE) sys.exit(2) assert type(to)==list @@ -216,10 +211,10 @@ def get_sheet_records(client, ss, ws): new_record[header[index]] = None sheet_records.append(new_record) except Exception as e: - print >> ERROR_FILE, "There was an error connecting to google." - print >> ERROR_FILE, e + print("There was an error connecting to google.", file=ERROR_FILE) + print(e, file=ERROR_FILE) exit(0) - + return sheet_records # ------------------------------------------------------------------------------ @@ -233,58 +228,32 @@ def get_spreadsheet_title(client, ss): data = response.get('properties', {}) spreadsheet_title = data["title"] except Exception as e: - print >> ERROR_FILE, "There was an error connecting to google." - print >> ERROR_FILE, e + print("There was an error connecting to google.", file=ERROR_FILE) + print(e, file=ERROR_FILE) exit(0) return spreadsheet_title # ------------------------------------------------------------------------------ # insert new users into table - this list does not contain users already in table -def append_command_to_clickhouse_file(sql_command_string): - with open(CLICKHOUSE_COMMANDS_FILEPATH, "a") as clickhouse_sql_file: - clickhouse_sql_file.write(sql_command_string) - -def insert_new_users(cursor, new_user_list): - # list of emails for users which returned an error when inserting into database - emails_to_remove = [] - added_user_triples = [] - added_user_authority_duples = [] +def insert_new_users(ch_client, new_user_list): + added_user_rows = [] + added_authority_rows = [] for user in new_user_list: - print >> OUTPUT_FILE, "new user: %s" % user.google_email; - try: - user_name = user.name - if isinstance(user_name, unicode): - user_name = user_name.encode('utf-8') - user_email_escaped=user.google_email.lower().replace('\'', '\\\'') - user_triple = (user_email_escaped, user_name, user.enabled) - sql_command_string = "insert into users values ('%s', '%s', '%s')" % user_triple - cursor.execute(sql_command_string) - added_user_triples.append(user_triple) - # authorities is semicolon delimited - authorities = user.authorities - cursor.executemany("insert into authorities values (%s, %s)", [(user_email_escaped, authority) for authority in authorities]) - added_user_authority_duples += ([(user_email_escaped, authority) for authority in authorities]) - except MySQLdb.Error, msg: - print >> OUTPUT_FILE, msg - print >> OUTPUT_FILE, "Removing user: %s" % user_name - print >> ERROR_FILE, msg - emails_to_remove.append(user.google_email.lower()) - # output commands to be executed on clickhouse sever - if len(added_user_triples) > 0: - values_string_items = ["('%s','%s','%s')" % triple for triple in added_user_triples] - sql_command_string = "INSERT INTO users VALUES %s;" % ",".join(values_string_items) - append_command_to_clickhouse_file(sql_command_string) - if len(added_user_authority_duples) > 0: - values_string_items = ["('%s','%s')" % duple for duple in added_user_authority_duples] - sql_command_string = "INSERT INTO authorities VALUES %s;" % ",".join(values_string_items) - append_command_to_clickhouse_file(sql_command_string) - return emails_to_remove + print("new user: %s" % user.google_email, file=OUTPUT_FILE) + user_name = user.name + user_email = user.google_email.lower() + added_user_rows.append([user_email, user_name, user.enabled]) + added_authority_rows += [[user_email, authority] for authority in user.authorities] + if added_user_rows: + ch_client.insert('users', added_user_rows, column_names=['email', 'name', 'enabled']) + if added_authority_rows: + ch_client.insert('authorities', added_authority_rows, column_names=['email', 'authority']) # ------------------------------------------------------------------------------ # get current users from database -def get_current_user_map(cursor): +def get_current_user_map(ch_client): # map that we are returning # key is the email address of the user (primary key) and value is a User object @@ -293,34 +262,15 @@ def get_current_user_map(cursor): # recall each tuple in user table is ['EMAIL', 'NAME', 'ENABLED'] & # no tuple can contain nulls try: - cursor.execute('select * from users') - for row in cursor.fetchall(): + result = ch_client.query('SELECT * FROM users') + for row in result.result_rows: to_return[row[0].lower()] = User(row[0].lower(), row[0].lower(), row[1], row[2], 'not_used_here') - except MySQLdb.Error, msg: - print >> ERROR_FILE, msg + except Exception as msg: + print(msg, file=ERROR_FILE) return None return to_return -# ------------------------------------------------------------------------------ -# get current user authorities - -def get_user_authorities(cursor, google_email): - - # list of authorities (cancer studies) we are returning -- as a set - to_return = [] - - # recall each tuple in authorities table is ['EMAIL', 'AUTHORITY'] - # no tuple can contain nulls - try: - cursor.execute('select * from authorities where email = (%s)', [google_email]) - for row in cursor.fetchall(): - to_return.append(row[1]) - except MySQLdb.Error, msg: - print >> ERROR_FILE, msg - return None - - return to_return # ------------------------------------------------------------------------------ # get current users from google spreadsheet @@ -399,33 +349,24 @@ def get_rejected_user_map(spreadsheet, sheet_records, current_user_map, portal_n else: to_return[google_email.lower()] = User(inst_email, google_email, name, 0, [portal_name + ':' + au for au in authorities.split(';')]) - print >> OUTPUT_FILE, 'Rejected user added to list: %s' % google_email.lower() return to_return # ------------------------------------------------------------------------------ -# get db connection +# get clickhouse client -def get_db_connection(portal_properties, port, ssl_ca_filename=None): - - # try and create a connection to the db +def get_clickhouse_client(portal_properties): try: - if ssl_ca_filename: - connection = MySQLdb.connect(host=portal_properties.cgds_database_host, port=int(port), - user=portal_properties.cgds_database_user, - passwd=portal_properties.cgds_database_pw, - db=portal_properties.cgds_database_name, - ssl={'ca': ssl_ca_filename}) - else: - connection = MySQLdb.connect(host=portal_properties.cgds_database_host, port=int(port), - user=portal_properties.cgds_database_user, - passwd=portal_properties.cgds_database_pw, - db=portal_properties.cgds_database_name) - except MySQLdb.Error, msg: - print >> ERROR_FILE, msg + return clickhouse_connect.get_client( + host=portal_properties.clickhouse_host, + port=int(portal_properties.clickhouse_port), + username=portal_properties.clickhouse_user, + password=portal_properties.clickhouse_pw, + database=portal_properties.clickhouse_db, + ) + except Exception as msg: + print(msg, file=ERROR_FILE) return None - return connection - # ------------------------------------------------------------------------------ # parse portal.properties @@ -441,33 +382,35 @@ def get_portal_properties(portal_properties_filename): continue # store name/value property = line.split('=') - # spreadsheet url contains an '=' sign - if line.startswith(CGDS_USERS_SPREADSHEET): + # spreadsheet url / db pw may contain an '=' sign + if line.startswith(CGDS_USERS_SPREADSHEET) or line.startswith(CLICKHOUSE_PW): property = [property[0], line[line.index('=')+1:len(line)]] if (len(property) != 2): - print >> ERROR_FILE, 'Skipping invalid entry in property file: ' + line + print('Skipping invalid entry in property file: ' + line, file=ERROR_FILE) continue properties[property[0]] = property[1].strip() portal_properties_file.close() # error check - if (CGDS_DATABASE_HOST not in properties or len(properties[CGDS_DATABASE_HOST]) == 0 or - CGDS_DATABASE_NAME not in properties or len(properties[CGDS_DATABASE_NAME]) == 0 or - CGDS_DATABASE_USER not in properties or len(properties[CGDS_DATABASE_USER]) == 0 or - CGDS_DATABASE_PW not in properties or len(properties[CGDS_DATABASE_PW]) == 0 or + if (CLICKHOUSE_HOST not in properties or len(properties[CLICKHOUSE_HOST]) == 0 or + CLICKHOUSE_PORT not in properties or len(properties[CLICKHOUSE_PORT]) == 0 or + CLICKHOUSE_USER not in properties or len(properties[CLICKHOUSE_USER]) == 0 or + CLICKHOUSE_PW not in properties or len(properties[CLICKHOUSE_PW]) == 0 or + CLICKHOUSE_DB not in properties or len(properties[CLICKHOUSE_DB]) == 0 or GOOGLE_ID not in properties or len(properties[GOOGLE_ID]) == 0 or GOOGLE_PW not in properties or len(properties[GOOGLE_PW]) == 0 or CGDS_USERS_SPREADSHEET not in properties or len(properties[CGDS_USERS_SPREADSHEET]) == 0 or CGDS_USERS_WORKSHEET not in properties or len(properties[CGDS_USERS_WORKSHEET]) == 0 or IMPORTER_SPREADSHEET not in properties or len(properties[IMPORTER_SPREADSHEET]) == 0): - print >> ERROR_FILE, 'Missing one or more required properties, please check property file' + print('Missing one or more required properties, please check property file', file=ERROR_FILE) return None # return an instance of PortalProperties - return PortalProperties(properties[CGDS_DATABASE_HOST], - properties[CGDS_DATABASE_NAME], - properties[CGDS_DATABASE_USER], - properties[CGDS_DATABASE_PW], + return PortalProperties(properties[CLICKHOUSE_HOST], + properties[CLICKHOUSE_PORT], + properties[CLICKHOUSE_USER], + properties[CLICKHOUSE_PW], + properties[CLICKHOUSE_DB], properties[GOOGLE_ID], properties[GOOGLE_PW], properties[CGDS_USERS_SPREADSHEET], @@ -478,49 +421,62 @@ def get_portal_properties(portal_properties_filename): # adds new users from the google spreadsheet into the cgds portal database # returns new user map if users have been inserted, None otherwise -def manage_users(client, spreadsheet, cursor, sheet_records, portal_name): +def manage_users(client, spreadsheet, ch_client, sheet_records, portal_name): # get map of current portal users - print >> OUTPUT_FILE, 'Getting list of current portal users' - current_user_map = get_current_user_map(cursor) + print('Getting list of current portal users', file=OUTPUT_FILE) + current_user_map = get_current_user_map(ch_client) if current_user_map is not None: - print >> OUTPUT_FILE, 'We have found %s current portal users' % len(current_user_map) + print('We have found %s current portal users' % len(current_user_map), file=OUTPUT_FILE) else: - print >> OUTPUT_FILE, 'Error reading user table' - return None, None, None + print('Error reading user table', file=OUTPUT_FILE) + return None, None # get list of new users and insert - print >> OUTPUT_FILE, 'Checking for new users' + print('Checking for new users', file=OUTPUT_FILE) new_user_map = get_new_user_map(spreadsheet, sheet_records, current_user_map, portal_name) rejected_user_map = get_rejected_user_map(spreadsheet, sheet_records, current_user_map, portal_name) - + if (len(new_user_map) > 0): - print >> OUTPUT_FILE, 'We have %s new user(s) to add' % len(new_user_map) - emails_to_remove = insert_new_users(cursor, new_user_map.values()) - return new_user_map, rejected_user_map, emails_to_remove + print('We have %s new user(s) to add' % len(new_user_map), file=OUTPUT_FILE) + insert_new_users(ch_client, new_user_map.values()) + return new_user_map, rejected_user_map else: - print >> OUTPUT_FILE, 'No new users to insert, exiting' - return None, rejected_user_map, None + print('No new users to insert, exiting', file=OUTPUT_FILE) + return None, rejected_user_map # ------------------------------------------------------------------------------ # updates user study access -def update_user_authorities(spreadsheet, cursor, sheet_records, portal_name): - - # get map of current portal users - print >> OUTPUT_FILE, 'Getting list of current portal users from spreadsheet' - all_user_map = get_new_user_map(spreadsheet, sheet_records, {}, portal_name) - if all_user_map is None: - return None; - print >> OUTPUT_FILE, 'Updating authorities for each user in current portal user list' - for user in all_user_map.values(): - sheet_authorities = set(user.authorities) - db_authorities = set(get_user_authorities(cursor, user.google_email)) - try: - cursor.executemany("insert into authorities values(%s, %s)", - [(user.google_email, authority) for authority in sheet_authorities - db_authorities]) - except MySQLdb.Error, msg: - print >> ERROR_FILE, msg +def update_user_authorities(spreadsheet, ch_client, sheet_records, portal_name): + + # get map of current portal users + print('Getting list of current portal users from spreadsheet', file=OUTPUT_FILE) + all_user_map = get_new_user_map(spreadsheet, sheet_records, {}, portal_name) + if all_user_map is None: + return None + total = len(all_user_map) + print('Fetching existing authorities for %d user(s) in batch' % total, file=OUTPUT_FILE) + emails = list(all_user_map.keys()) + result = ch_client.query('SELECT email, authority FROM authorities WHERE email IN {emails:Array(String)}', + parameters={'emails': emails}) + db_authorities_map = {} + for row in result.result_rows: + db_authorities_map.setdefault(row[0].lower(), set()).add(row[1]) + + new_authority_pairs = [] + for user in all_user_map.values(): + sheet_authorities = set(user.authorities) + db_authorities = db_authorities_map.get(user.google_email, set()) + added = [(user.google_email, authority) for authority in sheet_authorities - db_authorities] + if added: + print(' %s: adding %d new authority(s)' % (user.google_email, len(added)), file=OUTPUT_FILE) + new_authority_pairs += added + if new_authority_pairs: + print('Inserting %d new authority pair(s) into ClickHouse' % len(new_authority_pairs), file=OUTPUT_FILE) + ch_client.insert('authorities', new_authority_pairs, column_names=['email', 'authority']) + else: + print('No new authorities to insert', file=OUTPUT_FILE) # ------------------------------------------------------------------------------ # adds rejected user emails to rejected_users worksheet in an idempotent fashion @@ -529,7 +485,7 @@ def add_rejected_users_to_worksheet(rejected_user_map, google_spreadsheet, clien if rejected_user_map is None or len(rejected_user_map) == 0: return - print >> OUTPUT_FILE, 'Adding rejected users to rejected_users worksheet' + print('Adding rejected users to rejected_users worksheet', file=OUTPUT_FILE) # get existing records from the rejected_users worksheet # Note: get_sheet_records converts column names to lowercase with special chars removed @@ -543,23 +499,22 @@ def add_rejected_users_to_worksheet(rejected_user_map, google_spreadsheet, clien existing_emails.add(record[REJECTED_EMAIL_KEY].strip().lower()) except Exception as e: # worksheet might not exist or be empty, start with empty set - print >> OUTPUT_FILE, 'Creating new rejected_users worksheet or worksheet is empty' + print('Creating new rejected_users worksheet or worksheet is empty', file=OUTPUT_FILE) existing_emails = set() # remove any rejected users that are already in the worksheet - for user_email in rejected_user_map.keys(): + for user_email in list(rejected_user_map.keys()): if user_email.lower() in existing_emails: rejected_user_map.pop(user_email, None) # prepare new rows to append (only users not already in the worksheet) new_rows = [] - import datetime current_utc_time = datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') for user_email in rejected_user_map.keys(): if user_email.lower() not in existing_emails: new_rows.append([user_email, current_utc_time]) - print >> OUTPUT_FILE, 'Adding rejected user to worksheet: %s' % user_email + print('Adding rejected user to worksheet: %s' % user_email, file=OUTPUT_FILE) # append new rows to the worksheet if there are any if len(new_rows) > 0: @@ -575,11 +530,11 @@ def add_rejected_users_to_worksheet(rejected_user_map, google_spreadsheet, clien valueInputOption='RAW', body=body ).execute() - print >> OUTPUT_FILE, 'Added %s rejected user(s) to worksheet' % len(new_rows) + print('Added %s rejected user(s) to worksheet' % len(new_rows), file=OUTPUT_FILE) except Exception as e: - print >> ERROR_FILE, 'Error adding rejected users to worksheet: %s' % str(e) + print('Error adding rejected users to worksheet: %s' % str(e), file=ERROR_FILE) else: - print >> OUTPUT_FILE, 'No new rejected users to add to worksheet' + print('No new rejected users to add to worksheet', file=OUTPUT_FILE) # ------------------------------------------------------------------------------ # sends emails to users from a user map @@ -602,8 +557,8 @@ def send_emails(user_map, google_spreadsheet, client, worksheet, gmail_username, error_subject = ERROR_EMAIL_SUBJECT_CMO error_body = ERROR_EMAIL_BODY_CMO if emails_to_remove is None or user_key not in emails_to_remove: - print >> OUTPUT_FILE, ('Sending confirmation or rejection email to user: %s at %s' % - (user.name, user.inst_email)) + print('Sending confirmation or rejection email to user: %s at %s' % + (user.name, user.inst_email), file=OUTPUT_FILE) send_mail([user.inst_email], subject, body, gmail_username, gmail_password, sender=from_field, bcc=bcc_field, server=smtp_server) else: send_mail([user_key], error_subject, error_body, gmail_username, gmail_password, sender=from_field, bcc=bcc_field, server=smtp_server) @@ -614,7 +569,7 @@ def send_emails(user_map, google_spreadsheet, client, worksheet, gmail_username, def get_email_parameters(google_spreadsheet,client,worksheet): subject = '' body = '' - print >> OUTPUT_FILE, 'Getting email parameters from google spreadsheet' + print('Getting email parameters from google spreadsheet', file=OUTPUT_FILE) email_sheet_records = get_sheet_records(client, google_spreadsheet, worksheet) for record in email_sheet_records: if record[SUBJECT_KEY] is not None and record[BODY_KEY] is not None: @@ -624,7 +579,7 @@ def get_email_parameters(google_spreadsheet,client,worksheet): def get_portal_name_map(google_spreadsheet,client): portal_name = {} - print >> OUTPUT_FILE, 'Getting access control parameter from google spreadsheet' + print('Getting access control parameter from google spreadsheet', file=OUTPUT_FILE) access_control_sheet = get_sheet_records(client,google_spreadsheet,ACCESS_CONTROL_WORKSHEET) for row in access_control_sheet: if row[PORTAL_NAME_KEY] is not None and row[SPREADSHEET_NAME_KEY] is not None: @@ -632,23 +587,20 @@ def get_portal_name_map(google_spreadsheet,client): return portal_name -def establish_new_db_connection(portal_properties, port, ssl_ca_filename): - # get db connection & create cursor - print >> OUTPUT_FILE, 'Connecting to database: ' + portal_properties.cgds_database_name - connection = get_db_connection(portal_properties, port, ssl_ca_filename) - if connection is not None: - cursor = connection.cursor() - else: - print >> OUTPUT_FILE, 'Error connecting to database, exiting' +def establish_clickhouse_client(portal_properties): + print('Connecting to ClickHouse: ' + portal_properties.clickhouse_host, file=OUTPUT_FILE) + ch_client = get_clickhouse_client(portal_properties) + if ch_client is None: + print('Error connecting to ClickHouse, exiting', file=OUTPUT_FILE) sys.exit(2) - return (connection, cursor) + return ch_client # ------------------------------------------------------------------------------ # displays program usage (invalid args) def usage(): - print >> OUTPUT_FILE, 'importUsers.py --secrets-file [google secrets.json] --creds-file [oauth creds filename] --properties-file [properties file] --send-email-confirm [true or false] --use-institutional-id [true or false] --port [mysql port number] --sender [sender identifier - optional] --ssl-ca [ssl certificate file - optional] --smtp-server [smtp server hostname - required when send-email-confirm is true]' + print('importUsers.py --secrets-file [google secrets.json] --creds-file [oauth creds filename] --properties-file [properties file] --send-email-confirm [true or false] --use-institutional-id [true or false] --sender [sender identifier - optional] --smtp-server [smtp server hostname - required when send-email-confirm is true]', file=OUTPUT_FILE) # ------------------------------------------------------------------------------ # the big deal main. @@ -657,9 +609,9 @@ def main(): # parse command line options try: - opts, args = getopt.getopt(sys.argv[1:], '', ['secrets-file=', 'creds-file=', 'properties-file=', 'ssl-ca=', 'send-email-confirm=', 'use-institutional-id=', 'port=', 'sender=', 'gmail-username=', 'gmail-password=', 'smtp-server=']) - except getopt.error, msg: - print >> ERROR_FILE, msg + opts, args = getopt.getopt(sys.argv[1:], '', ['secrets-file=', 'creds-file=', 'properties-file=', 'send-email-confirm=', 'use-institutional-id=', 'sender=', 'gmail-username=', 'gmail-password=', 'smtp-server=']) + except getopt.error as msg: + print(msg, file=ERROR_FILE) usage() sys.exit(2) @@ -668,9 +620,7 @@ def main(): creds_filename = '' properties_filename = '' send_email_confirm = '' - port = '' sender = '' - ssl_ca_filename = '' # not required gmail_username = '' gmail_password = '' smtp_server = '' @@ -686,18 +636,14 @@ def main(): gmail_password = a elif o == '--properties-file': properties_filename = a - elif o == '--ssl-ca': - ssl_ca_filename = a elif o == '--send-email-confirm': send_email_confirm = a elif o == '--sender': sender = a - elif o == '--port': - port = a elif o == '--smtp-server': smtp_server = a - if (secrets_filename == '' or creds_filename == '' or properties_filename == '' or send_email_confirm == '' or port == '' or + if (secrets_filename == '' or creds_filename == '' or properties_filename == '' or send_email_confirm == '' or (send_email_confirm != 'true' and send_email_confirm != 'false') or (send_email_confirm == 'true' and (gmail_username == '' or gmail_password == '' or smtp_server == ''))): usage() @@ -705,14 +651,14 @@ def main(): # check existence of file if not os.path.exists(properties_filename): - print >> ERROR_FILE, 'properties file cannot be found: ' + properties_filename + print('properties file cannot be found: ' + properties_filename, file=ERROR_FILE) sys.exit(2) # parse/get relevant portal properties - print >> OUTPUT_FILE, 'Reading portal properties file: ' + properties_filename + print('Reading portal properties file: ' + properties_filename, file=OUTPUT_FILE) portal_properties = get_portal_properties(properties_filename) if not portal_properties: - print >> OUTPUT_FILE, 'Error reading %s, exiting' % properties_filename + print('Error reading %s, exiting' % properties_filename, file=OUTPUT_FILE) return # create client for interacting with google sheets api @@ -720,31 +666,26 @@ def main(): # connect to importer configuration spreadsheet and get mapping of spreadsheet to portal name portal_name_map = get_portal_name_map(portal_properties.google_importer_spreadsheet,client) + ch_client = establish_clickhouse_client(portal_properties) + google_spreadsheets = portal_properties.google_spreadsheet.split(';') for google_spreadsheet in google_spreadsheets: if not google_spreadsheet == '': - (connection, cursor) = establish_new_db_connection(portal_properties, port, ssl_ca_filename) - sheet_records = get_sheet_records(client, google_spreadsheet, - portal_properties.google_worksheet) + portal_properties.google_worksheet) spreadsheet_title = get_spreadsheet_title(client, google_spreadsheet) - - print >> OUTPUT_FILE, 'Importing ' + spreadsheet_title + ' ...' + + print('Importing ' + spreadsheet_title + ' ...', file=OUTPUT_FILE) app_name = portal_name_map[spreadsheet_title] - + # the 'guts' of the script # note: original script depended on one to one mapping of spreadsheet to app name - and lookup was by spreadsheet # with a now decommissioned app (genie-archive) we wanted to be able to do one to many mapping (one spreadsheet to multiple apps) # to fit this logic would have to rework how we specify properties or introduce new column (db name) as index but might have other effects - new_user_map, rejected_user_map, emails_to_remove = manage_users(client, google_spreadsheet, cursor, sheet_records, app_name) - - # update user authorities - update_user_authorities(google_spreadsheet, cursor, sheet_records, app_name) + new_user_map, rejected_user_map = manage_users(client, google_spreadsheet, ch_client, sheet_records, app_name) - # commit changes before moving on to next spreadsheet - cursor.close() - connection.commit() - connection.close() + # update user authorities + update_user_authorities(google_spreadsheet, ch_client, sheet_records, app_name) # add the emails from rejected_user_map to rejected_users worksheet in an idempotent fashion # also remove any emails from rejected_user_map that already exist in the worksheet-- @@ -753,9 +694,9 @@ def main(): # sending emails if send_email_confirm == 'true': - print >> OUTPUT_FILE, "Sending confirmation emails to new users" - send_emails(new_user_map, google_spreadsheet, client, IMPORT_EMAIL_WORKSHEET, gmail_username, gmail_password, sender, smtp_server, emails_to_remove) - print >> OUTPUT_FILE, "Sending rejection emails to newly rejected users" + print("Sending confirmation emails to new users", file=OUTPUT_FILE) + send_emails(new_user_map, google_spreadsheet, client, IMPORT_EMAIL_WORKSHEET, gmail_username, gmail_password, sender, smtp_server) + print("Sending rejection emails to newly rejected users", file=OUTPUT_FILE) send_emails(rejected_user_map, google_spreadsheet, client, REJECT_EMAIL_WORKSHEET, gmail_username, gmail_password, sender, smtp_server) # ------------------------------------------------------------------------------