11#! /usr/bin/env bash
22
33COHORT=$1
4- TMP_DIR=" /data/portal-cron/tmp/preconsume_problematic_samples"
4+ FETCH_NUM=1
5+ TMP_DIR=" /data/portal-cron/tmp/preconsume_problematic_samples/${COHORT} "
56CVR_FETCH_PROPERTIES_FILEPATH=" /data/portal-cron/git-repos/pipelines-configuration/properties/fetch-cvr/application.properties"
67CVR_USERNAME=$( grep ' dmp.user_name' ${CVR_FETCH_PROPERTIES_FILEPATH} | head -n 1 | sed -E s/[^= ][^= ]* =//)
78CVR_PASSWORD=$( grep ' dmp.password' ${CVR_FETCH_PROPERTIES_FILEPATH} | head -n 1 | sed -E s/[^= ][^= ]* =//)
@@ -13,11 +14,11 @@ CVR_HEME_FETCH_URL_PREFIX="${CVR_TUMOR_SERVER}cbio_retrieve_heme_variants"
1314CVR_ARCHER_FETCH_URL_PREFIX=" ${CVR_TUMOR_SERVER} cbio_archer_retrieve_variants"
1415CVR_ACCESS_FETCH_URL_PREFIX=" ${CVR_TUMOR_SERVER} cbio_retrieve_access_variants"
1516CVR_CONSUME_SAMPLE_URL_PREFIX=" ${CVR_TUMOR_SERVER} cbio_consume_sample"
16- FETCH_OUTPUT_FILEPATH=" $TMP_DIR /cvr_data_ ${COHORT} .json "
17- CONSUME_IDS_FILEPATH=" $TMP_DIR /${COHORT} _consume .ids"
18- PROBLEMATIC_EVENT_CONSUME_IDS_FILEPATH=" $TMP_DIR /problematic_event_consume_ ${COHORT} .ids"
19- PROBLEMATIC_METADATA_CONSUME_IDS_FILEPATH=" $TMP_DIR /problematic_metadata_consume_ ${COHORT} .ids"
20- CONSUME_ATTEMPT_OUTPUT_FILEPATH=" $TMP_DIR /consume_attempt_output_ ${COHORT} .json"
17+ FETCH_OUTPUT_FILEPATH=" "
18+ CONSUME_IDS_FILEPATH=" $TMP_DIR /consume .ids"
19+ PROBLEMATIC_EVENT_CONSUME_IDS_FILEPATH=" $TMP_DIR /problematic_event_consume .ids"
20+ PROBLEMATIC_METADATA_CONSUME_IDS_FILEPATH=" $TMP_DIR /problematic_metadata_consume .ids"
21+ CONSUME_ATTEMPT_OUTPUT_FILEPATH=" $TMP_DIR /consume_attempt_output .json"
2122DETECT_SAMPLES_WITH_NULL_DP_AD_FIELDS_SCRIPT_FILEPATH=/data/portal-cron/scripts/detect_samples_with_null_dp_ad_fields.py
2223DETECT_SAMPLES_WITH_PROBLEMATIC_METADATA_SCRIPT_FILEPATH=/data/portal-cron/scripts/detect_samples_with_problematic_metadata.py
2324CVR_MONITOR_SLACK_URI_FILE=" /data/portal-cron/pipelines-credentials/cvr-monitor-webhook-uri"
@@ -29,6 +30,9 @@ function make_tmp_dir_if_necessary() {
2930 echo " Error : could not create tmp directory '$TMP_DIR '" >&2
3031 exit 1
3132 fi
33+ else
34+ # Remove files from last fetch
35+ rm $TMP_DIR /*
3236 fi
3337}
3438
@@ -57,6 +61,7 @@ function set_cvr_fetch_url_prefix() {
5761}
5862
5963function fetch_currently_queued_samples() {
64+ FETCH_OUTPUT_FILEPATH=" $TMP_DIR /cvr_data_${FETCH_NUM} .json"
6065 dmp_token=$( curl $CVR_CREATE_SESSION_URL | grep session_id | sed -E ' s/",[[:space:]]*$//' | sed -E ' s/.*"//' )
6166 curl " ${CVR_FETCH_URL_PREFIX} /${dmp_token} /0" > ${FETCH_OUTPUT_FILEPATH}
6267}
@@ -69,10 +74,13 @@ function detect_samples_with_problematic_metadata() {
6974 $DETECT_SAMPLES_WITH_PROBLEMATIC_METADATA_SCRIPT_FILEPATH ${FETCH_OUTPUT_FILEPATH} ${PROBLEMATIC_METADATA_CONSUME_IDS_FILEPATH}
7075}
7176
72- function exit_if_no_problems_detected () {
77+ function problems_were_detected () {
7378 if [ ! -s ${PROBLEMATIC_EVENT_CONSUME_IDS_FILEPATH} ] && [ ! -s ${PROBLEMATIC_METADATA_CONSUME_IDS_FILEPATH} ] ; then
74- echo " no problematic samples detected .. exiting"
75- exit 0
79+ echo " no problematic samples detected"
80+ return 1
81+ else
82+ echo " problematic samples were detected"
83+ return 0
7684 fi
7785}
7886
@@ -108,41 +116,57 @@ function attempt_to_consume_problematic_sample() {
108116 dmp_token=" $1 "
109117 sample_id=" $2 "
110118 type_of_problem=" $3 " # pass 'e' for event problems and 'm' for metadata problems
119+ register_attempt=" $4 "
111120 HTTP_STATUS=$( curl -sSL -w ' %{http_code}' -o " $CONSUME_ATTEMPT_OUTPUT_FILEPATH " " ${CVR_CONSUME_SAMPLE_URL_PREFIX} /${dmp_token} /${sample_id} " )
112121 if [[ $HTTP_STATUS =~ ^2 ]] ; then
113122 if ! grep ' "error": "' " $CONSUME_ATTEMPT_OUTPUT_FILEPATH " ; then
114123 if grep --silent ' affectedRows": 1' " $CONSUME_ATTEMPT_OUTPUT_FILEPATH " ; then
115- register_successful_consumption " ${sample_id} " " $type_of_problem "
116- continue
124+ if [ " $register_attempt " == true ] ; then
125+ register_successful_consumption " ${sample_id} " " $type_of_problem "
126+ continue
127+ fi
117128 fi
118129 fi
119130 fi
120- register_failed_consumption " ${sample_id} " " $type_of_problem "
131+ if [ " $register_attempt " == true ] ; then
132+ register_failed_consumption " ${sample_id} " " $type_of_problem "
133+ fi
121134}
122135
123136function attempt_to_consume_problematic_samples() {
137+ register_attempt=${1:- true}
124138 dmp_token=$( curl $CVR_CREATE_SESSION_URL | grep session_id | sed -E ' s/",[[:space:]]*$//' | sed -E ' s/.*"//' )
125139 while read sample_id ; do
126- attempt_to_consume_problematic_sample " $dmp_token " " $sample_id " " e"
140+ attempt_to_consume_problematic_sample " $dmp_token " " $sample_id " " e" " $register_attempt "
127141 done < ${PROBLEMATIC_EVENT_CONSUME_IDS_FILEPATH}
128142 while read sample_id ; do
129- attempt_to_consume_problematic_sample " $dmp_token " " $sample_id " " m"
143+ attempt_to_consume_problematic_sample " $dmp_token " " $sample_id " " m" " $register_attempt "
130144 done < ${PROBLEMATIC_METADATA_CONSUME_IDS_FILEPATH}
131145}
132146
133147function consume_hardcoded_samples() {
134148 rm -f ${PROBLEMATIC_EVENT_CONSUME_IDS_FILEPATH} ${PROBLEMATIC_METADATA_CONSUME_IDS_FILEPATH}
135149 touch ${PROBLEMATIC_EVENT_CONSUME_IDS_FILEPATH}
136150 touch ${PROBLEMATIC_METADATA_CONSUME_IDS_FILEPATH}
137-
138151 if [ " $COHORT " == " mskimpact" ] ; then
139152 echo " P-0025907-N01-IM6" >> " ${PROBLEMATIC_METADATA_CONSUME_IDS_FILEPATH} "
140153 fi
141154 if [ -f " ${PROBLEMATIC_METADATA_CONSUME_IDS_FILEPATH} " ] ; then
142- attempt_to_consume_problematic_samples
155+ # Won't register attempt (so it doesn't show up in logs every night)
156+ attempt_to_consume_problematic_samples false
143157 fi
144158}
145159
160+ function need_to_log_actions {
161+ if [ ${# succeeded_to_consume_problematic_events_sample_list[@]} -gt 0 ] || \
162+ [ ${# failed_to_consume_problematic_events_sample_list[@]} -gt 0 ] || \
163+ [ ${# succeeded_to_consume_problematic_metadata_sample_list[@]} -gt 0 ] || \
164+ [ ${# failed_to_consume_problematic_metadata_sample_list[@]} -gt 0 ] ; then
165+ return 0
166+ fi
167+ return 1
168+ }
169+
146170function log_actions() {
147171 date
148172 echo -e " ${COHORT^^} Problematic Samples"
@@ -158,7 +182,7 @@ function post_slack_message() {
158182 if [ ${# failed_to_consume_problematic_events_sample_list[@]} -gt 0 ]; then
159183 MESSAGE=" ${MESSAGE} Attempted Unsuccessfully To Consume :\n${failed_to_consume_problematic_events_sample_list[*]} "
160184 fi
161- MESSAGE=" ${MESSAGE} Warning : the following samples have been preemptively consumed before fetch because they contained problematic metadata where the gene-panel property was unset or had value UNKNOWN.\nSuccessfully Consumed :\n${succeeded_to_consume_problematic_metadata_sample_list[*]} "
185+ MESSAGE=" ${MESSAGE} Warning : the following samples have been preemptively consumed before fetch because they contained problematic metadata where the gene-panel property was unset, invalid, or had value UNKNOWN.\nSuccessfully Consumed :\n${succeeded_to_consume_problematic_metadata_sample_list[*]} "
162186 if [ ${# failed_to_consume_problematic_metadata_sample_list[@]} -gt 0 ]; then
163187 MESSAGE=" ${MESSAGE} Attempted Unsuccessfully To Consume :\n${failed_to_consume_problematic_metadata_sample_list[*]} "
164188 fi
@@ -168,20 +192,24 @@ function post_slack_message() {
168192date
169193check_args
170194make_tmp_dir_if_necessary
171- failed_to_consume_problematic_events_sample_list=() # temporary code
172- succeeded_to_consume_problematic_events_sample_list=() # temporary code
173- failed_to_consume_problematic_metadata_sample_list=() # temporary code
174- succeeded_to_consume_problematic_metadata_sample_list=() # temporary code
175195set_cvr_fetch_url_prefix
176- consume_hardcoded_samples # temporary code
177- fetch_currently_queued_samples
178- detect_samples_with_problematic_events
179- detect_samples_with_problematic_metadata
180- exit_if_no_problems_detected
181196failed_to_consume_problematic_events_sample_list=()
182197succeeded_to_consume_problematic_events_sample_list=()
183198failed_to_consume_problematic_metadata_sample_list=()
184199succeeded_to_consume_problematic_metadata_sample_list=()
185- attempt_to_consume_problematic_samples
186- log_actions
187- post_slack_message
200+ while :
201+ do
202+ consume_hardcoded_samples # temporary code
203+ fetch_currently_queued_samples
204+ detect_samples_with_problematic_events
205+ detect_samples_with_problematic_metadata
206+ if ! problems_were_detected ; then
207+ break
208+ fi
209+ attempt_to_consume_problematic_samples
210+ (( FETCH_NUM++ ))
211+ done
212+ if need_to_log_actions ; then
213+ log_actions
214+ post_slack_message
215+ fi
0 commit comments