@@ -28,6 +28,20 @@ nextflow_process {
2828 }
2929 }
3030
31+ run("GAWK", alias: "GAWK_BIVARIATE_BINARY_PHENO") {
32+ script "../../../gawk/main.nf"
33+ process {
34+ """
35+ input[0] = [
36+ [ id:'BinaryTrait1__BinaryTrait2' ],
37+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true)
38+ ]
39+ input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, (NR % 2) + 1, (NR % 3 == 0 ? 2 : 1) }').collectFile(name:'bivariate_binary_phenotype.awk')
40+ input[2] = false
41+ """
42+ }
43+ }
44+
3145 run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") {
3246 script "../../../gawk/main.nf"
3347 process {
@@ -73,14 +87,16 @@ nextflow_process {
7387 }
7488 }
7589
76- run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRM_LDMS1 ") {
90+ run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS1 ") {
7791 script "../../makegrmpart/main.nf"
7892 process {
7993 """
8094 file('plink_simulated.mbfile').text = 'plink_simulated\\n'
8195
8296 input[0] = [
83- [ id:'plink_simulated_ldms1', part_gcta_job:1, nparts_gcta:1 ],
97+ [ id:'plink_simulated_ldms1' ],
98+ 1,
99+ 1,
84100 file('plink_simulated.mbfile'),
85101 [
86102 file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
@@ -100,14 +116,16 @@ nextflow_process {
100116 }
101117 }
102118
103- run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRM_LDMS2 ") {
119+ run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS2 ") {
104120 script "../../makegrmpart/main.nf"
105121 process {
106122 """
107123 file('plink_simulated.mbfile').text = 'plink_simulated\\n'
108124
109125 input[0] = [
110- [ id:'plink_simulated_ldms2', part_gcta_job:1, nparts_gcta:1 ],
126+ [ id:'plink_simulated_ldms2' ],
127+ 1,
128+ 1,
111129 file('plink_simulated.mbfile'),
112130 [
113131 file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
@@ -134,13 +152,13 @@ nextflow_process {
134152 .of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1')
135153 .collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
136154
137- ldms_grm_files = GCTA_MAKEGRM_LDMS1 .out.grm_files
138- .mix(GCTA_MAKEGRM_LDMS2 .out.grm_files)
139- .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] }
155+ ldms_grm_files = GCTA_MAKEGRMPART_LDMS1 .out.grm_files
156+ .mix(GCTA_MAKEGRMPART_LDMS2 .out.grm_files)
157+ .map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files }
140158 .collect()
141159 .map { rows -> rows.flatten() }
142160
143- input[0] = GAWK_BIVARIATE_PHENO.out.output
161+ input[0] = GAWK_BIVARIATE_PHENO.out.output.map { meta, phenotype_file -> [meta, phenotype_file, [], []] }
144162 input[1] = mgrm_file
145163 .combine(ldms_grm_files)
146164 .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] }
@@ -187,6 +205,53 @@ nextflow_process {
187205 }
188206 }
189207
208+ test("homo_sapiens popgen - binary bivariate phenotype with ldms mgrm and prevalence") {
209+ config "./nextflow.config"
210+ when {
211+ process {
212+ """
213+ mgrm_file = Channel
214+ .of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1')
215+ .collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
216+
217+ ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files
218+ .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files)
219+ .map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files }
220+ .collect()
221+ .map { rows -> rows.flatten() }
222+
223+ input[0] = GAWK_BIVARIATE_BINARY_PHENO.out.output.map { meta, phenotype_file -> [meta, phenotype_file, '1 2', '0.30 0.25'] }
224+ input[1] = mgrm_file
225+ .combine(ldms_grm_files)
226+ .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] }
227+ input[2] = [[ id:'covariates_quant' ], []]
228+ input[3] = [[ id:'covariates_cat' ], []]
229+ """
230+ }
231+ }
232+
233+ then {
234+ assertAll(
235+ { assert process.success },
236+ { assert process.out.bivariate_results.size() == 1 },
237+ { assert process.out.log_file.size() == 1 },
238+ { assert process.out.bivariate_results.get(0).get(0).id == "BinaryTrait1__BinaryTrait2" },
239+ { assert file(process.out.bivariate_results.get(0).get(1)).name == "BinaryTrait1__BinaryTrait2.hsq" },
240+ { assert file(process.out.log_file.get(0).get(1)).name == "BinaryTrait1__BinaryTrait2.log" },
241+ {
242+ def logText = file(process.out.log_file.get(0).get(1)).text
243+ assert logText.contains("--reml-bivar-prevalence 0.3 0.25")
244+ },
245+ {
246+ assert snapshot(
247+ process.out.bivariate_results,
248+ process.out.findAll { key, val -> key.startsWith('versions') }
249+ ).match()
250+ }
251+ )
252+ }
253+ }
254+
190255 test("homo_sapiens popgen - bivariate phenotype fails when mgrm references missing GRM basename") {
191256 config "./nextflow.config"
192257 when {
@@ -196,13 +261,13 @@ nextflow_process {
196261 .of('plink_simulated_ldms_missing.part_1_1')
197262 .collectFile(name:'plink_simulated_ldms_broken.mgrm', newLine: true)
198263
199- ldms_grm_files = GCTA_MAKEGRM_LDMS1 .out.grm_files
200- .mix(GCTA_MAKEGRM_LDMS2 .out.grm_files)
201- .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] }
264+ ldms_grm_files = GCTA_MAKEGRMPART_LDMS1 .out.grm_files
265+ .mix(GCTA_MAKEGRMPART_LDMS2 .out.grm_files)
266+ .map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files }
202267 .collect()
203268 .map { rows -> rows.flatten() }
204269
205- input[0] = GAWK_BIVARIATE_PHENO.out.output
270+ input[0] = GAWK_BIVARIATE_PHENO.out.output.map { meta, phenotype_file -> [meta, phenotype_file, [], []] }
206271 input[1] = broken_mgrm_file
207272 .combine(ldms_grm_files)
208273 .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] }
@@ -231,13 +296,13 @@ nextflow_process {
231296 .of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1')
232297 .collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
233298
234- ldms_grm_files = GCTA_MAKEGRM_LDMS1 .out.grm_files
235- .mix(GCTA_MAKEGRM_LDMS2 .out.grm_files)
236- .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] }
299+ ldms_grm_files = GCTA_MAKEGRMPART_LDMS1 .out.grm_files
300+ .mix(GCTA_MAKEGRMPART_LDMS2 .out.grm_files)
301+ .map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files }
237302 .collect()
238303 .map { rows -> rows.flatten() }
239304
240- input[0] = GAWK_BIVARIATE_PHENO.out.output
305+ input[0] = GAWK_BIVARIATE_PHENO.out.output.map { meta, phenotype_file -> [meta, phenotype_file, [], []] }
241306 input[1] = mgrm_file
242307 .combine(ldms_grm_files)
243308 .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] }
0 commit comments