Skip to content

Commit b881b5b

Browse files
committed
Linux Serial Command Line Runs Fixed
1 parent a0120e4 commit b881b5b

7 files changed

Lines changed: 14 additions & 15 deletions

File tree

streamline/ApplyModelMain.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import sys
2020
import pandas as pd
2121
import FeatureSelectionJob
22+
import ApplyModelJob
2223
import time
2324
import csv
2425
import glob
@@ -62,7 +63,6 @@ def main(argv):
6263
categorical_cutoff = metadata['Categorical Cutoff']
6364
sig_cutoff = metadata['Statistical Significance Cutoff']
6465
cv_partitions = metadata['CV Partitions']
65-
random_state = metadata['Random Seed']
6666
scale_data = metadata['Use Data Scaling']
6767
impute_data = metadata['Use Data Imputation']
6868
multi_impute = metadata['Use Multivariate Imputation']
@@ -110,11 +110,11 @@ def main(argv):
110110
if file_extension == 'txt' or file_extension == 'csv':
111111
if apply_name not in unique_datanames:
112112
unique_datanames.append(apply_name)
113+
job_counter += 1
113114
if eval(options.run_parallel):
114-
job_counter += 1
115-
submitClusterJob(options.reserved_memory,options.maximum_memory,options.queue,experiment_path,datasetFilename,full_path,class_label,instance_label,categorical_cutoff,sig_cutoff,cv_partitions,scale_data,impute_data,primary_metric,options.dataset_for_rep,options.match_label,options.plot_ROC,options.plot_PRC,options.plot_metric_boxplots,options.export_feature_correlations,jupyterRun,multi_impute,random_state)
115+
submitClusterJob(options.reserved_memory,options.maximum_memory,options.queue,experiment_path,datasetFilename,full_path,class_label,instance_label,categorical_cutoff,sig_cutoff,cv_partitions,scale_data,impute_data,primary_metric,options.dataset_for_rep,options.match_label,options.plot_ROC,options.plot_PRC,options.plot_metric_boxplots,options.export_feature_correlations,jupyterRun,multi_impute)
116116
else:
117-
submitLocalJob(datasetFilename,full_path,class_label,instance_label,categorical_cutoff,sig_cutoff,cv_partitions,scale_data,impute_data,primary_metric,options.dataset_for_rep,options.match_label,options.plot_ROC,options.plot_PRC,options.plot_metric_boxplots,options.export_feature_correlations,jupyterRun,multi_impute,random_state)
117+
submitLocalJob(datasetFilename,full_path,class_label,instance_label,categorical_cutoff,sig_cutoff,cv_partitions,scale_data,impute_data,primary_metric,options.dataset_for_rep,options.match_label,options.plot_ROC,options.plot_PRC,options.plot_metric_boxplots,options.export_feature_correlations,jupyterRun,multi_impute)
118118
file_count += 1
119119

120120
if file_count == 0: #Check that there was at least 1 dataset
@@ -138,11 +138,11 @@ def main(argv):
138138
if not options.do_check:
139139
print(str(job_counter)+ " jobs submitted in Phase 9")
140140

141-
def submitLocalJob(datasetFilename,full_path,class_label,instance_label,categorical_cutoff,sig_cutoff,cv_partitions,scale_data,impute_data,primary_metric,dataset_for_rep,match_label,plot_ROC,plot_PRC,plot_metric_boxplots,export_feature_correlations,jupyterRun,multi_impute,random_state):
141+
def submitLocalJob(datasetFilename,full_path,class_label,instance_label,categorical_cutoff,sig_cutoff,cv_partitions,scale_data,impute_data,primary_metric,dataset_for_rep,match_label,plot_ROC,plot_PRC,plot_metric_boxplots,export_feature_correlations,jupyterRun,multi_impute):
142142
""" Runs ApplyModelJob.py on each dataset in dataset_path locally. These runs will be completed serially rather than in parallel. """
143-
ApplyModelJob.job(datasetFilename,full_path,class_label,instance_label,categorical_cutoff,sig_cutoff,cv_partitions,scale_data,impute_data,primary_metric,dataset_for_rep,match_label,plot_ROC,plot_PRC,plot_metric_boxplots,export_feature_correlations,jupyterRun,multi_impute,random_state)
143+
ApplyModelJob.job(datasetFilename,full_path,class_label,instance_label,categorical_cutoff,sig_cutoff,cv_partitions,scale_data,impute_data,primary_metric,dataset_for_rep,match_label,plot_ROC,plot_PRC,plot_metric_boxplots,export_feature_correlations,jupyterRun,multi_impute)
144144

145-
def submitClusterJob(reserved_memory,maximum_memory,queue,experiment_path,datasetFilename,full_path,class_label,instance_label,categorical_cutoff,sig_cutoff,cv_partitions,scale_data,impute_data,primary_metric,dataset_for_rep,match_label,plot_ROC,plot_PRC,plot_metric_boxplots,export_feature_correlations,jupyterRun,multi_impute,random_state):
145+
def submitClusterJob(reserved_memory,maximum_memory,queue,experiment_path,datasetFilename,full_path,class_label,instance_label,categorical_cutoff,sig_cutoff,cv_partitions,scale_data,impute_data,primary_metric,dataset_for_rep,match_label,plot_ROC,plot_PRC,plot_metric_boxplots,export_feature_correlations,jupyterRun,multi_impute):
146146
""" Runs ApplyModelJob.py on each dataset in rep_data_path. Runs in parallel on a linux-based computing cluster that uses an IBM Spectrum LSF for job scheduling."""
147147
train_name = full_path.split('/')[-1] #original training data name
148148
apply_name = datasetFilename.split('/')[-1].split('.')[0]

streamline/DataCompareMain.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,8 @@ def main(argv):
4949
#Load variables specified earlier in the pipeline from metadata
5050
sig_cutoff = metadata['Statistical Significance Cutoff']
5151
jupyterRun = metadata['Run From Jupyter Notebook']
52-
52+
job_counter += 1
5353
if eval(options.run_parallel):
54-
job_counter += 1
5554
submitClusterJob(options.output_path+'/'+options.experiment_name,options.reserved_memory,options.maximum_memory,options.queue,sig_cutoff,jupyterRun)
5655
else:
5756
submitLocalJob(options.output_path+'/'+options.experiment_name,sig_cutoff,jupyterRun)

streamline/FeatureImportanceMain.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ def main(argv):
8787
os.mkdir(full_path+"/feature_selection/mutualinformation")
8888
for cv_train_path in glob.glob(full_path+"/CVDatasets/*_CV_*Train.csv"):
8989
command_text = '/FeatureImportanceJob.py ' + cv_train_path+" "+experiment_path+" "+str(random_state)+" "+class_label+" "+instance_label+" " +str(options.instance_subset)+" mi "+str(options.n_jobs)+' '+str(options.use_TURF)+' '+str(options.TURF_pct)
90+
job_counter += 1
9091
if eval(options.run_parallel):
91-
job_counter += 1
9292
submitClusterJob(command_text, experiment_path,options.reserved_memory,options.maximum_memory,options.queue,jupyterRun)
9393
else:
9494
submitLocalJob(cv_train_path,experiment_path,random_state,class_label,instance_label,options.instance_subset,'mi',options.n_jobs,options.use_TURF,options.TURF_pct,jupyterRun)

streamline/FeatureSelectionMain.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ def main(argv):
7676

7777
for dataset_directory_path in dataset_paths:
7878
full_path = options.output_path + "/" + options.experiment_name + "/" + dataset_directory_path
79+
job_counter += 1
7980
if eval(options.run_parallel):
80-
job_counter += 1
8181
submitClusterJob(full_path,options.output_path+'/'+options.experiment_name,do_mutual_info,do_multisurf,options.max_features_to_keep,options.filter_poor_features,options.top_features,options.export_scores,class_label,instance_label,cv_partitions,options.overwrite_cv,options.reserved_memory,options.maximum_memory,options.queue,jupyterRun)
8282
else:
8383
submitLocalJob(full_path,do_mutual_info,do_multisurf,options.max_features_to_keep,options.filter_poor_features,options.top_features,options.export_scores,class_label,instance_label,cv_partitions,options.overwrite_cv,jupyterRun)

streamline/ModelMain.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,8 +197,8 @@ def main(argv):
197197
for algorithm in algorithms:
198198
algAbrev = algInfo[algorithm][1]
199199
algNoSpace = algorithm.replace(" ", "_")
200+
job_counter += 1
200201
if eval(options.run_parallel):
201-
job_counter += 1
202202
submitClusterJob(algNoSpace,train_file_path,test_file_path,full_path,options.n_trials,options.timeout,options.lcs_timeout,options.export_hyper_sweep_plots,instance_label,class_label,random_state,options.output_path+'/'+options.experiment_name,cvCount,filter_poor_features,options.reserved_memory,options.maximum_memory,options.do_lcs_sweep,options.nu,options.iterations,options.N,options.training_subsample,options.queue,options.use_uniform_FI,options.primary_metric,algAbrev,jupyterRun)
203203
else:
204204
submitLocalJob(algNoSpace,train_file_path,test_file_path,full_path,options.n_trials,options.timeout,options.lcs_timeout,options.export_hyper_sweep_plots,instance_label,class_label,random_state,cvCount,filter_poor_features,options.do_lcs_sweep,options.nu,options.iterations,options.N,options.training_subsample,options.use_uniform_FI,options.primary_metric,algAbrev,jupyterRun)

streamline/PDF_ReportMain.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import argparse
2121
import time
2222
import glob
23+
import PDF_ReportJob
2324

2425
def main(argv):
2526
#Parse arguments
@@ -50,8 +51,8 @@ def main(argv):
5051
raise Exception('Replication and Dataset paths must be specified as arguments to generate PDF summary on new data analysis!')
5152

5253
if not options.do_check: #Run job submission
54+
job_counter += 1
5355
if eval(options.run_parallel):
54-
job_counter += 1
5556
submitClusterJob(experiment_path,options.training,options.rep_data_path,options.dataset_for_rep,options.reserved_memory,options.maximum_memory,options.queue)
5657
else:
5758
submitLocalJob(experiment_path,options.training,options.rep_data_path,options.dataset_for_rep)

streamline/StatsMain.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,8 @@ def main(argv):
8686
os.mkdir(full_path+'/model_evaluation/DT_Viz')
8787
if eval(do_GP) and not os.path.exists(full_path+'/model_evaluation/GP_Viz'):
8888
os.mkdir(full_path+'/model_evaluation/GP_Viz')
89-
89+
job_counter += 1
9090
if eval(options.run_parallel):
91-
job_counter += 1
9291
submitClusterJob(full_path,options.plot_ROC,options.plot_PRC,options.plot_FI_box,class_label,instance_label,options.output_path+'/'+options.experiment_name,cv_partitions,scale_data,options.reserved_memory,options.maximum_memory,options.queue,options.plot_metric_boxplots,primary_metric,options.top_model_features,sig_cutoff,options.metric_weight,jupyterRun)
9392
else:
9493
submitLocalJob(full_path,options.plot_ROC,options.plot_PRC,options.plot_FI_box,class_label,instance_label,cv_partitions,scale_data,options.plot_metric_boxplots,primary_metric,options.top_model_features,sig_cutoff,options.metric_weight,jupyterRun)

0 commit comments

Comments
 (0)