@@ -15,6 +15,7 @@ object CoxMain extends Main {
1515
1616 def run (sqlContext : HiveContext , argsMap : Map [String , String ]): Option [Dataset [_]] = {
1717
18+ logger.info(" Running FilteringMain..." )
1819 val flatEvents : Dataset [FlatEvent ] = FilteringMain .run(sqlContext, argsMap).get
1920 coxFeaturing(flatEvents, argsMap)
2021 }
@@ -23,16 +24,15 @@ object CoxMain extends Main {
2324 import flatEvents .sqlContext .implicits ._
2425
2526 val sqlContext = flatEvents.sqlContext
27+ val sc = flatEvents.sqlContext.sparkContext
2628
2729 argsMap.get(" conf" ).foreach(sqlContext.setConf(" conf" , _))
2830 argsMap.get(" env" ).foreach(sqlContext.setConf(" env" , _))
2931
3032 val cancerDefinition : String = FilteringConfig .cancerDefinition
3133 val filterDelayedPatients : Boolean = CoxConfig .filterDelayedPatients
3234 val outputRoot = FilteringConfig .outputPaths.coxFeatures
33- val outputDir = s " $outputRoot/ $cancerDefinition/ $filterDelayedPatients"
34-
35- logger.info(" Running FilteringMain..." )
35+ val outputDir = s " $outputRoot/ $cancerDefinition"
3636
3737 val dcirFlat : DataFrame = sqlContext.read.parquet(FilteringConfig .inputPaths.dcir)
3838
@@ -52,7 +52,9 @@ object CoxMain extends Main {
5252 logger.info(" Caching disease events..." )
5353 logger.info(" Number of disease events: " + diseaseFlatEvents.count)
5454
55- logger.info(" Preparing for Cox" )
55+ logger.info(" Preparing for Cox with the following parameters:" )
56+ logger.info(CoxConfig .summarize.foreach(println))
57+
5658 logger.info(" (Lazy) Transforming Follow-up events..." )
5759 val observationFlatEvents = CoxObservationPeriodTransformer .transform(drugFlatEvents)
5860
@@ -89,8 +91,9 @@ object CoxMain extends Main {
8991 .union(observationFlatEvents)
9092 .union(tracklossFlatEvents)
9193
92- logger.info(" Writing summary of all cox events..." )
94+ logger.info(" Writing summary of all cox events and config ..." )
9395 flatEventsSummary.toDF.write.parquet(s " $outputDir/eventsSummary " )
96+ sc.parallelize(CoxConfig .summarize.toSeq).coalesce(1 ).saveAsTextFile(s " $outputDir/config.txt " )
9497 logger.info(" Writing Exposures..." )
9598 exposures.toDF.write.parquet(s " $outputDir/exposures " )
9699
0 commit comments