Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 10 additions & 22 deletions dags/import_clickhouse_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,19 +196,6 @@ def send_update_notification(notification_filepath: str, ssh_conn_id: str) -> No
scripts_dir,
db_properties_filepath,
),
"create_derived_tables": _script(
scripts_dir,
"airflow-create-derived-tables.sh",
importer,
scripts_dir,
db_properties_filepath,
),
"set_import_complete": _script(
scripts_dir,
"set_update_process_state.sh",
db_properties_filepath,
"complete",
),
"fetch_data": _script(
scripts_dir,
"data_source_repo_clone_manager.sh",
Expand All @@ -225,7 +212,6 @@ def send_update_notification(notification_filepath: str, ssh_conn_id: str) -> No
db_properties_filepath,
),
# reuse the old import-sql script for now
# TODO: we might need to update the send_update_notification code here
"import_direct_to_clickhouse": _script(
scripts_dir,
"airflow-import-sql.sh",
Expand All @@ -234,24 +220,26 @@ def send_update_notification(notification_filepath: str, ssh_conn_id: str) -> No
db_properties_filepath,
notification_filepath,
),
"transfer_deployment": _script(
"create_derived_tables": _script(
scripts_dir,
"airflow-transfer-deployment.sh",
"airflow-create-derived-tables.sh",
importer,
scripts_dir,
db_properties_filepath,
color_swap_config_filepath,
),
"clear_persistence_caches": _script(
"check_data_integrity": _script(
scripts_dir,
"airflow-clear-persistence-caches.sh",
"airflow-check-data-integrity.sh",
importer,
scripts_dir,
db_properties_filepath,
),
"set_import_running": _script(
"transfer_deployment": _script(
scripts_dir,
"airflow-transfer-deployment.sh",
scripts_dir,
"set_update_process_state.sh",
db_properties_filepath,
"running",
color_swap_config_filepath,
),
"set_import_abandoned": _script(
scripts_dir,
Expand Down
56 changes: 0 additions & 56 deletions dags/import_msk_dag.py

This file was deleted.

47 changes: 0 additions & 47 deletions dags/import_review_dag.py

This file was deleted.

5 changes: 4 additions & 1 deletion dags/import_triage_clickhouse_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ def _wire(tasks: dict[str, object]) -> None:

tasks["import_direct_to_clickhouse"] >> tasks["create_derived_tables"]

tasks["create_derived_tables"] >> tasks["transfer_deployment"]
tasks["create_derived_tables"] >> tasks["check_data_integrity"]

tasks["check_data_integrity"] >> tasks["transfer_deployment"]

tasks["transfer_deployment"] >> [
tasks["cleanup_data"],
Expand All @@ -53,6 +55,7 @@ def _wire(tasks: dict[str, object]) -> None:
"setup_import",
"import_direct_to_clickhouse",
"create_derived_tables",
"check_data_integrity",
"transfer_deployment",
#"clear_persistence_caches",
"send_update_notification",
Expand Down
67 changes: 0 additions & 67 deletions dags/import_triage_dag.py

This file was deleted.

43 changes: 43 additions & 0 deletions import-scripts/airflow-check-data-integrity.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/bin/bash

PORTAL_DATABASE=$1
PORTAL_SCRIPTS_DIRECTORY=$2
MANAGE_DATABASE_TOOL_PROPERTIES_FILEPATH=$3

if [ -z "$PORTAL_SCRIPTS_DIRECTORY" ]; then
PORTAL_SCRIPTS_DIRECTORY="/data/portal-cron/scripts"
fi
AUTOMATION_ENV_SCRIPT_FILEPATH="${PORTAL_SCRIPTS_DIRECTORY}/automation-environment.sh"
if [ ! -f "$AUTOMATION_ENV_SCRIPT_FILEPATH" ] ; then
echo "$(date): Unable to locate $AUTOMATION_ENV_SCRIPT_FILEPATH, exiting..."
exit 1
fi
source "$AUTOMATION_ENV_SCRIPT_FILEPATH"

# Get the current production database color
GET_DB_IN_PROD_SCRIPT_FILEPATH="${PORTAL_SCRIPTS_DIRECTORY}/get_database_currently_in_production.sh"
current_production_database_color=$(sh "$GET_DB_IN_PROD_SCRIPT_FILEPATH" "$MANAGE_DATABASE_TOOL_PROPERTIES_FILEPATH")
destination_database_color="unset"
if [ ${current_production_database_color:0:4} == "blue" ] ; then
destination_database_color="green"
fi
if [ ${current_production_database_color:0:5} == "green" ] ; then
destination_database_color="blue"
fi
if [ "$destination_database_color" == "unset" ] ; then
echo "Error during determination of the destination database color" >&2
exit 1
fi

# eg. genie-aws-importer-blue.jar
IMPORTER_JAR_FILENAME="/data/portal-cron/lib/${IMPORTER_NAME}-importer-${destination_database_color}.jar"

tmp="${PORTAL_HOME}/tmp/${TMP_DIR_NAME}"
#INTEGRITY_CHECK_ARGS="$JAVA_SSL_ARGS -Dspring.profiles.active=dbcp -Djava.io.tmpdir=$tmp -ea -cp $IMPORTER_JAR_FILENAME org.mskcc.cbio.importer.Admin"
INTEGRITY_CHECK_ARGS="-cp $IMPORTER_JAR_FILENAME org.mskcc.cbio.portal.scripts.CheckClickHouseConstraints"

"$JAVA_BINARY" $INTEGRITY_CHECK_ARGS
if [ $? -gt 0 ]; then
echo "Error: Integrity check failed! Will not transfer deployment" >&2
exit 1
fi
23 changes: 0 additions & 23 deletions import-scripts/airflow-clear-persistence-caches.sh

This file was deleted.

3 changes: 0 additions & 3 deletions import-scripts/airflow-create-derived-tables.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ fi
if [ "$PORTAL_DATABASE" == "genie" ] ; then
clickhouse_schema_branch_name="genie-portal-db-clickhouse-sql-for-import"
fi
if [ "$PORTAL_DATABASE" == "msk" ] ; then
clickhouse_schema_branch_name="msk-portal-db-clickhouse-sql-for-import"
fi
if [ "$PORTAL_DATABASE" == "triage-clickhouse" ]; then
clickhouse_schema_branch_name="triage-portal-db-clickhouse-sql-for-import"
fi
Expand Down
3 changes: 0 additions & 3 deletions import-scripts/airflow-import-clickhouse.sh
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,6 @@ fi
if [ "$PORTAL_DATABASE" == "genie" ] ; then
clickhouse_schema_branch_name="genie-portal-db-clickhouse-sql-for-import"
fi
if [ "$PORTAL_DATABASE" == "msk" ] ; then
clickhouse_schema_branch_name="msk-portal-db-clickhouse-sql-for-import"
fi
if ! $DOWNLOAD_DERVIED_TABLE_SQL_FILES_SCRIPT_FILEPATH --github_branch_name "$clickhouse_schema_branch_name" "$derived_table_sql_script_dirpath" ; then
echo "Error during download of derived table construction .sql files from github" >&2
exit 1
Expand Down
Loading