Skip to content

Commit f384aa6

Browse files
committed
CNAM-154 Refactored MLPPMain class and added MLPPConfig
CNAM-154 Refactored global testing and added MLPPMainSuite CNAM-154 Small fixes
1 parent 67ed5da commit f384aa6

19 files changed

Lines changed: 295 additions & 147 deletions
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
env_name = "cmap"
2+
3+
dates = {
4+
age_reference = [2010, 12, 31, 23, 59, 59]
5+
study_start = [2010, 1, 1]
6+
study_end = [2014, 12, 31, 23, 59, 59]
7+
}
8+
9+
mlpp_parameters = {
10+
min_timestamp = ${dates.study_start}
11+
max_timestamp = ${dates.study_end}
12+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
env_name = "cnam"

src/main/resources/filtering-default.conf renamed to src/main/resources/config/filtering-default.conf

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ default = {
1616
}
1717
dates = {
1818
age_reference = [2006, 12, 31, 23, 59, 59]
19+
study_start = [2006, 1, 1]
20+
study_end = [2009, 12, 31, 23, 59, 59]
1921
}
2022
paths = {
2123
input = {
@@ -32,38 +34,41 @@ default = {
3234
root = "/shared/filtered_data"
3335
patients = "/shared/filtered_data/patients"
3436
flat_events = "/shared/filtered_data/flat_events"
37+
mlpp_features = "/shared/features/mlpp"
38+
}
39+
}
40+
cox = {
41+
42+
}
43+
mlpp_parameters = {
44+
bucket_size = 30 # in days
45+
lag_count = 10
46+
min_timestamp = ${default.dates.study_start}
47+
max_timestamp = ${default.dates.study_end}
48+
49+
exposures = {
50+
min_purchases = 1
51+
start_delay = 0
52+
purchases_window = 0
53+
only_first = false
54+
filter_diagnosed_patients = true
55+
filter_delayed_entries = true
56+
delayed_entry_threshold = 12
3557
}
3658
}
3759
}
3860

39-
# For the CNAM environment, we use the default values
4061
cnam = ${default}
41-
cnam.env_name = "cnam"
62+
cnam = {
63+
include "filtering-cnam.conf"
64+
}
4265

43-
# Overriding only the dates for cmap environment:
4466
cmap = ${default}
45-
cmap.env_name = "cmap"
46-
cmap.dates = {
47-
age_reference = [2010, 12, 31, 23, 59, 59]
67+
cmap = {
68+
include "filtering-cmap.conf"
4869
}
4970

50-
# Overriding only the paths for test environment:
5171
test = ${default}
52-
test.env_name = "test"
53-
test.paths = {
54-
input = {
55-
dcir = "src/test/resources/test-input/DCIR.parquet"
56-
pmsi_mco = "src/test/resources/test-input/MCO.parquet"
57-
pmsi_had = "src/test/resources/test-input/HAD.parquet"
58-
pmsi_ssr = "src/test/resources/test-input/SSR.parquet"
59-
ir_ben = "src/test/resources/test-input/IR_BEN_R.parquet"
60-
ir_imb = "src/test/resources/test-input/IR_IMB_R.parquet"
61-
ir_pha = "src/test/resources/test-input/IR_PHA_R.parquet"
62-
dosages = "src/test/resources/test-input/DOSE_PER_MOLECULE.CSV"
63-
}
64-
output = {
65-
root = "target/test/output"
66-
patients = "target/test/output/patients"
67-
flat_events = "target/test/output/flat_events"
68-
}
72+
test = {
73+
include "filtering-test.conf"
6974
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
env_name = "test"
2+
3+
paths = {
4+
input = {
5+
dcir = "src/test/resources/test-input/DCIR.parquet"
6+
pmsi_mco = "src/test/resources/test-input/MCO.parquet"
7+
pmsi_had = "src/test/resources/test-input/HAD.parquet"
8+
pmsi_ssr = "src/test/resources/test-input/SSR.parquet"
9+
ir_ben = "src/test/resources/test-input/IR_BEN_R.parquet"
10+
ir_imb = "src/test/resources/test-input/IR_IMB_R.parquet"
11+
ir_pha = "src/test/resources/test-input/IR_PHA_R.parquet"
12+
dosages = "src/test/resources/test-input/DOSE_PER_MOLECULE.CSV"
13+
}
14+
output = {
15+
root = "target/test/output"
16+
patients = "target/test/output/patients"
17+
flat_events = "target/test/output/flat_events"
18+
mlpp_features = "target/test/output/mlpp_features"
19+
}
20+
}

src/main/scala/fr/polytechnique/cmap/cnam/filtering/ExposuresTransformer.scala

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@ import fr.polytechnique.cmap.cnam.utilities.functions._
55

66
trait ExposuresTransformer extends DatasetTransformer[FlatEvent, FlatEvent] {
77

8-
// Constant definitions. Should be verified before compiling.
9-
// In the future, we may want to export them to an external file.
10-
val StudyStart = makeTS(2006, 1, 1)
8+
lazy val StudyStart = FilteringConfig.dates.studyStart
119

1210
def transform(input: Dataset[FlatEvent]): Dataset[FlatEvent]
1311
}

src/main/scala/fr/polytechnique/cmap/cnam/filtering/FilteringConfig.scala

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ object FilteringConfig {
4343
val configPath: String = sqlContext.getConf("conf", "")
4444
val environment: String = sqlContext.getConf("env", "test")
4545

46-
val defaultConfig = ConfigFactory.parseResources("filtering-default.conf").resolve().getConfig(environment)
47-
val newConfig = ConfigFactory.parseFile(new java.io.File(configPath))
46+
val defaultConfig = ConfigFactory.parseResources("config/filtering-default.conf").resolve().getConfig(environment)
47+
val newConfig = ConfigFactory.parseFile(new java.io.File(configPath)).resolve()
4848

4949
newConfig.withFallback(defaultConfig).resolve()
5050
}
@@ -60,7 +60,12 @@ object FilteringConfig {
6060
dosages: String
6161
)
6262

63-
case class OutputPaths(root: String, patients: String, flatEvents: String)
63+
case class OutputPaths(
64+
root: String,
65+
patients: String,
66+
flatEvents: String,
67+
mlppFeatures: String
68+
)
6469

6570
case class Limits(
6671
minYear: Int,
@@ -73,7 +78,16 @@ object FilteringConfig {
7378
maxAge: Int
7479
)
7580

76-
case class Dates(ageReference: Timestamp)
81+
case class Dates(
82+
ageReference: Timestamp,
83+
studyStart: Timestamp,
84+
studyEnd: Timestamp
85+
)
86+
87+
case class TracklossDefinition(
88+
threshold: Int,
89+
delay: Int
90+
)
7791

7892
lazy val drugCategories: List[String] = conf.getStringList("drug_categories").asScala.toList
7993
lazy val cancerDefinition: String = conf.getString("cancer_definition")
@@ -92,7 +106,8 @@ object FilteringConfig {
92106
lazy val outputPaths = OutputPaths(
93107
root = conf.getString("paths.output.root"),
94108
patients = conf.getString("paths.output.patients"),
95-
flatEvents = conf.getString("paths.output.flat_events")
109+
flatEvents = conf.getString("paths.output.flat_events"),
110+
mlppFeatures = conf.getString("paths.output.mlpp_features")
96111
)
97112
lazy val limits = Limits(
98113
minYear = conf.getInt("limits.min_year"),
@@ -105,6 +120,14 @@ object FilteringConfig {
105120
maxAge = conf.getInt("limits.max_age")
106121
)
107122
lazy val dates = Dates(
108-
ageReference = makeTS(conf.getIntList("dates.age_reference").asScala.toList)
123+
ageReference = makeTS(conf.getIntList("dates.age_reference").asScala.toList),
124+
studyStart = makeTS(conf.getIntList("dates.study_start").asScala.toList),
125+
studyEnd = makeTS(conf.getIntList("dates.study_end").asScala.toList)
126+
)
127+
lazy val tracklossDefinition = TracklossDefinition(
128+
threshold = conf.getInt("trackloss.threshold"),
129+
delay = conf.getInt("trackloss.delay")
109130
)
131+
132+
def modelConfig(modelName: String): Config = conf.getConfig(modelName)
110133
}

src/main/scala/fr/polytechnique/cmap/cnam/filtering/PatientsTransformer.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,10 @@ object PatientsTransformer extends Transformer[Patient] with PatientsTransformer
5959

6060
patients.where(filterPatientsByAge)
6161
.select(
62-
patientID.as("patientID"),
63-
gender.as("gender"),
64-
birthdate.as("birthDate"),
65-
deathDate.as("deathDate")
66-
).as[Patient]
62+
patientID.as("patientID"),
63+
gender.as("gender"),
64+
birthdate.as("birthDate"),
65+
deathDate.as("deathDate")
66+
).as[Patient]
6767
}
6868
}

src/main/scala/fr/polytechnique/cmap/cnam/filtering/Sources.scala

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,17 @@ class Sources(
1616
val irImb: Option[DataFrame] = None,
1717
val irPha: Option[DataFrame] = None,
1818
val dosages: Option[DataFrame] = None)
19+
20+
object Sources {
21+
def apply(
22+
dcir: Option[DataFrame] = None,
23+
pmsiMco: Option[DataFrame] = None,
24+
pmsiHad: Option[DataFrame] = None,
25+
pmsiSsr: Option[DataFrame] = None,
26+
irBen: Option[DataFrame] = None,
27+
irImb: Option[DataFrame] = None,
28+
irPha: Option[DataFrame] = None,
29+
dosages: Option[DataFrame] = None) = {
30+
new Sources(dcir, pmsiMco, pmsiHad, pmsiSsr, irBen, irImb, irPha, dosages)
31+
}
32+
}

src/main/scala/fr/polytechnique/cmap/cnam/filtering/cox/CoxMain.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ object CoxMain extends Main {
117117
case Array(arg1, args2) => (args(0), args(1), true)
118118
case _ => ("test", "broad", true)
119119
}
120-
val config: Config = ConfigFactory.parseResources("filtering-default.conf").getConfig(environment)
120+
val config: Config = ConfigFactory.parseResources("config/filtering-default.conf").getConfig(environment)
121121
coxFeaturing(sqlContext, config, cancerDefinition, filterDelayedPatients)
122122
stopContext()
123123
}

src/main/scala/fr/polytechnique/cmap/cnam/filtering/ltsccs/LTSCCSMain.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ object LTSCCSMain extends Main {
5757
override def main(args: Array[String]): Unit = {
5858
startContext()
5959
val environment = if (args.nonEmpty) args(0) else "test"
60-
val config: Config = ConfigFactory.parseResources("filtering-default.conf").getConfig(environment)
60+
val config: Config = ConfigFactory.parseResources("config/filtering-default.conf").getConfig(environment)
6161
runLTSCCS(sqlContext, config)
6262
stopContext()
6363
}

0 commit comments

Comments
 (0)