1919import sys
2020import pandas as pd
2121import FeatureSelectionJob
22+ import ApplyModelJob
2223import time
2324import csv
2425import glob
@@ -62,7 +63,6 @@ def main(argv):
6263 categorical_cutoff = metadata ['Categorical Cutoff' ]
6364 sig_cutoff = metadata ['Statistical Significance Cutoff' ]
6465 cv_partitions = metadata ['CV Partitions' ]
65- random_state = metadata ['Random Seed' ]
6666 scale_data = metadata ['Use Data Scaling' ]
6767 impute_data = metadata ['Use Data Imputation' ]
6868 multi_impute = metadata ['Use Multivariate Imputation' ]
@@ -110,11 +110,11 @@ def main(argv):
110110 if file_extension == 'txt' or file_extension == 'csv' :
111111 if apply_name not in unique_datanames :
112112 unique_datanames .append (apply_name )
113+ job_counter += 1
113114 if eval (options .run_parallel ):
114- job_counter += 1
115- submitClusterJob (options .reserved_memory ,options .maximum_memory ,options .queue ,experiment_path ,datasetFilename ,full_path ,class_label ,instance_label ,categorical_cutoff ,sig_cutoff ,cv_partitions ,scale_data ,impute_data ,primary_metric ,options .dataset_for_rep ,options .match_label ,options .plot_ROC ,options .plot_PRC ,options .plot_metric_boxplots ,options .export_feature_correlations ,jupyterRun ,multi_impute ,random_state )
115+ submitClusterJob (options .reserved_memory ,options .maximum_memory ,options .queue ,experiment_path ,datasetFilename ,full_path ,class_label ,instance_label ,categorical_cutoff ,sig_cutoff ,cv_partitions ,scale_data ,impute_data ,primary_metric ,options .dataset_for_rep ,options .match_label ,options .plot_ROC ,options .plot_PRC ,options .plot_metric_boxplots ,options .export_feature_correlations ,jupyterRun ,multi_impute )
116116 else :
117- submitLocalJob (datasetFilename ,full_path ,class_label ,instance_label ,categorical_cutoff ,sig_cutoff ,cv_partitions ,scale_data ,impute_data ,primary_metric ,options .dataset_for_rep ,options .match_label ,options .plot_ROC ,options .plot_PRC ,options .plot_metric_boxplots ,options .export_feature_correlations ,jupyterRun ,multi_impute , random_state )
117+ submitLocalJob (datasetFilename ,full_path ,class_label ,instance_label ,categorical_cutoff ,sig_cutoff ,cv_partitions ,scale_data ,impute_data ,primary_metric ,options .dataset_for_rep ,options .match_label ,options .plot_ROC ,options .plot_PRC ,options .plot_metric_boxplots ,options .export_feature_correlations ,jupyterRun ,multi_impute )
118118 file_count += 1
119119
120120 if file_count == 0 : #Check that there was at least 1 dataset
@@ -138,11 +138,11 @@ def main(argv):
138138 if not options .do_check :
139139 print (str (job_counter )+ " jobs submitted in Phase 9" )
140140
141- def submitLocalJob (datasetFilename ,full_path ,class_label ,instance_label ,categorical_cutoff ,sig_cutoff ,cv_partitions ,scale_data ,impute_data ,primary_metric ,dataset_for_rep ,match_label ,plot_ROC ,plot_PRC ,plot_metric_boxplots ,export_feature_correlations ,jupyterRun ,multi_impute , random_state ):
141+ def submitLocalJob (datasetFilename ,full_path ,class_label ,instance_label ,categorical_cutoff ,sig_cutoff ,cv_partitions ,scale_data ,impute_data ,primary_metric ,dataset_for_rep ,match_label ,plot_ROC ,plot_PRC ,plot_metric_boxplots ,export_feature_correlations ,jupyterRun ,multi_impute ):
142142 """ Runs ApplyModelJob.py on each dataset in dataset_path locally. These runs will be completed serially rather than in parallel. """
143- ApplyModelJob .job (datasetFilename ,full_path ,class_label ,instance_label ,categorical_cutoff ,sig_cutoff ,cv_partitions ,scale_data ,impute_data ,primary_metric ,dataset_for_rep ,match_label ,plot_ROC ,plot_PRC ,plot_metric_boxplots ,export_feature_correlations ,jupyterRun ,multi_impute , random_state )
143+ ApplyModelJob .job (datasetFilename ,full_path ,class_label ,instance_label ,categorical_cutoff ,sig_cutoff ,cv_partitions ,scale_data ,impute_data ,primary_metric ,dataset_for_rep ,match_label ,plot_ROC ,plot_PRC ,plot_metric_boxplots ,export_feature_correlations ,jupyterRun ,multi_impute )
144144
145- def submitClusterJob (reserved_memory ,maximum_memory ,queue ,experiment_path ,datasetFilename ,full_path ,class_label ,instance_label ,categorical_cutoff ,sig_cutoff ,cv_partitions ,scale_data ,impute_data ,primary_metric ,dataset_for_rep ,match_label ,plot_ROC ,plot_PRC ,plot_metric_boxplots ,export_feature_correlations ,jupyterRun ,multi_impute , random_state ):
145+ def submitClusterJob (reserved_memory ,maximum_memory ,queue ,experiment_path ,datasetFilename ,full_path ,class_label ,instance_label ,categorical_cutoff ,sig_cutoff ,cv_partitions ,scale_data ,impute_data ,primary_metric ,dataset_for_rep ,match_label ,plot_ROC ,plot_PRC ,plot_metric_boxplots ,export_feature_correlations ,jupyterRun ,multi_impute ):
146146 """ Runs ApplyModelJob.py on each dataset in rep_data_path. Runs in parallel on a linux-based computing cluster that uses an IBM Spectrum LSF for job scheduling."""
147147 train_name = full_path .split ('/' )[- 1 ] #original training data name
148148 apply_name = datasetFilename .split ('/' )[- 1 ].split ('.' )[0 ]
0 commit comments