@@ -8,9 +8,8 @@ nextflow_process {
88 tag "modules_nfcore"
99 tag "gcta"
1010 tag "gcta/fastgwa"
11- tag "gcta/makegrmpart"
12- tag "gcta/makebksparse"
1311 tag "gawk"
12+ tag "gcta/testhelpers"
1413 config "./nextflow.config"
1514
1615 setup {
@@ -84,41 +83,17 @@ nextflow_process {
8483 }
8584 }
8685
87- run("GCTA_MAKEGRMPART ", alias: "GCTA_MAKEGRMPART_DENSE ") {
88- script "../../makegrmpart /main.nf"
86+ run("GCTA_TEST_PREPARE_SPARSE ", alias: "GCTA_TEST_PREPARE_SPARSE_DENSE ") {
87+ script "./helpers/gcta_prepare_sparse /main.nf"
8988 process {
9089 """
91- file('plink_simulated.mbfile').text = 'plink_simulated\\n'
92-
9390 input[0] = [
94- [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ],
95- file('plink_simulated.mbfile'),
96- [
97- file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
98- ],
99- [
100- file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
101- ],
102- [
103- file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
104- ]
91+ [ id:'plink_simulated_dense' ],
92+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true),
93+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true),
94+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
10595 ]
106- input[1] = [[ id:'all_variants' ], []]
107- """
108- }
109- }
110-
111- run("GCTA_MAKEBKSPARSE", alias: "GCTA_MAKEBKSPARSE_DENSE") {
112- script "../../makebksparse/main.nf"
113- process {
114- """
115- dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin ->
116- def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job
117- [[ id:prefix ], grm_id, grm_bin, grm_n_bin]
118- }
119-
120- input[0] = dense_grm
121- input[1] = Channel.value(0.05)
96+ input[1] = 0.05
12297 """
12398 }
12499 }
@@ -128,10 +103,6 @@ nextflow_process {
128103 when {
129104 process {
130105 """
131- sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp ->
132- [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp]
133- }
134-
135106 quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file ->
136107 [[ id:meta.id, is_binary:false ], phenotype_file]
137108 }
@@ -142,7 +113,7 @@ nextflow_process {
142113 file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true),
143114 file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true)
144115 ]
145- input[1] = sparse_grm
116+ input[1] = GCTA_TEST_PREPARE_SPARSE_DENSE.out. sparse_grm
146117 input[2] = quantitative_pheno
147118 input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output
148119 input[4] = GAWK_CATEGORICAL_COVARIATES.out.output
@@ -159,6 +130,9 @@ nextflow_process {
159130 { assert process.out.results.get(0).get(0).id == "plink_simulated" },
160131 { assert process.out.results.get(0).get(1).id == "QuantitativeTrait" },
161132 { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_QuantitativeTrait.fastGWA" },
133+ { assert path(process.out.results.get(0).get(2)).readLines().get(0).contains("BETA") },
134+ { assert file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--fastGWA-mlm-exact") },
135+ { assert file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--grm-sparse plink_simulated_dense") },
162136 {
163137 assert snapshot(
164138 process.out.results,
@@ -173,10 +147,6 @@ nextflow_process {
173147 when {
174148 process {
175149 """
176- sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp ->
177- [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp]
178- }
179-
180150 multi_pheno = GAWK_MULTI_PHENO.out.output.map { meta, phenotype_file ->
181151 [[ id:'QuantitativeTraitMpheno2', is_binary:false ], phenotype_file]
182152 }
@@ -187,7 +157,7 @@ nextflow_process {
187157 file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true),
188158 file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true)
189159 ]
190- input[1] = sparse_grm
160+ input[1] = GCTA_TEST_PREPARE_SPARSE_DENSE.out. sparse_grm
191161 input[2] = multi_pheno
192162 input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output
193163 input[4] = GAWK_CATEGORICAL_COVARIATES.out.output
@@ -204,6 +174,8 @@ nextflow_process {
204174 { assert process.out.results.get(0).get(0).id == "plink_simulated" },
205175 { assert process.out.results.get(0).get(1).id == "QuantitativeTraitMpheno2" },
206176 { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_QuantitativeTraitMpheno2.fastGWA" },
177+ { assert path(process.out.results.get(0).get(2)).readLines().get(0).contains("BETA") },
178+ { assert file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--mpheno 2") },
207179 {
208180 assert snapshot(
209181 process.out.results,
@@ -218,21 +190,22 @@ nextflow_process {
218190 when {
219191 process {
220192 """
221- sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp ->
222- [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp]
223- }
224-
225193 binary_pheno = GAWK_BINARY_PHENO.out.output.map { meta, phenotype_file ->
226194 [[ id:meta.id, is_binary:true ], phenotype_file]
227195 }
228196
197+ unused_sparse_id = file("${workDir}/unused_sparse.grm.id")
198+ unused_sparse_id.text = "S1 S1\\n"
199+ unused_sparse_sp = file("${workDir}/unused_sparse.grm.sp")
200+ unused_sparse_sp.text = "1 1 1\\n"
201+
229202 input[0] = [
230203 [ id:"plink_simulated" ],
231204 file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true),
232205 file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true),
233206 file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true)
234207 ]
235- input[1] = sparse_grm
208+ input[1] = [[ id:'unused_sparse' ], unused_sparse_id, unused_sparse_sp]
236209 input[2] = binary_pheno
237210 input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output
238211 input[4] = GAWK_CATEGORICAL_COVARIATES.out.output
@@ -249,6 +222,8 @@ nextflow_process {
249222 { assert process.out.results.get(0).get(0).id == "plink_simulated" },
250223 { assert process.out.results.get(0).get(1).id == "BinaryTrait" },
251224 { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_BinaryTrait.fastGWA" },
225+ { assert file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--fastGWA-lr") },
226+ { assert !file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--grm-sparse") },
252227 {
253228 assert snapshot(
254229 process.out.results,
@@ -259,16 +234,92 @@ nextflow_process {
259234 }
260235 }
261236
262- test("homo_sapiens popgen - plink2 with sparse GRM - stub") {
263- options "-stub"
237+ test("homo_sapiens popgen - plink2 with sparse GRM and quantitative phenotype") {
238+
239+ when {
240+ process {
241+ """
242+ quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file ->
243+ [[ id:meta.id, is_binary:false ], phenotype_file]
244+ }
245+
246+ input[0] = [
247+ [ id:"plink_simulated" ],
248+ file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pgen", checkIfExists: true),
249+ file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pvar", checkIfExists: true),
250+ file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.psam", checkIfExists: true)
251+ ]
252+ input[1] = GCTA_TEST_PREPARE_SPARSE_DENSE.out.sparse_grm
253+ input[2] = quantitative_pheno
254+ input[3] = [[ id:'covariates_quant' ], []]
255+ input[4] = [[ id:'covariates_cat' ], []]
256+ input[5] = false
257+ input[6] = 1
258+ """
259+ }
260+ }
261+
262+ then {
263+ assertAll(
264+ { assert process.success },
265+ { assert process.out.results.size() == 1 },
266+ { assert process.out.results.get(0).get(0).id == "plink_simulated" },
267+ { assert process.out.results.get(0).get(1).id == "QuantitativeTrait" },
268+ { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_QuantitativeTrait.fastGWA" },
269+ { assert path(process.out.results.get(0).get(2)).readLines().get(0).contains("BETA") },
270+ {
271+ assert snapshot(
272+ process.out.results,
273+ process.out.findAll { key, val -> key.startsWith('versions') }
274+ ).match()
275+ }
276+ )
277+ }
278+ }
264279
280+ test("homo_sapiens popgen - non-binary fails when sparse GRM prefix mismatches files") {
265281 when {
266282 process {
267283 """
268- sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp ->
269- [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp]
284+ quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file ->
285+ [[ id:meta.id, is_binary:false ], phenotype_file]
286+ }
287+
288+ sparse_grm_bad_prefix = GCTA_TEST_PREPARE_SPARSE_DENSE.out.sparse_grm.map { meta, sparse_grm_id, sparse_grm_sp ->
289+ [[ id:'incorrect_sparse_prefix' ], sparse_grm_id, sparse_grm_sp]
270290 }
271291
292+ input[0] = [
293+ [ id:"plink_simulated" ],
294+ file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true),
295+ file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true),
296+ file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true)
297+ ]
298+ input[1] = sparse_grm_bad_prefix
299+ input[2] = quantitative_pheno
300+ input[3] = [[ id:'covariates_quant' ], []]
301+ input[4] = [[ id:'covariates_cat' ], []]
302+ input[5] = false
303+ input[6] = 1
304+ """
305+ }
306+ }
307+
308+ then {
309+ assertAll(
310+ { assert !process.success },
311+ { assert process.exitStatus != 0 },
312+ { assert process.stdout.toString().contains("incorrect_sparse_prefix") }
313+ )
314+ }
315+ }
316+
317+ test("homo_sapiens popgen - plink2 with sparse GRM - stub") {
318+ options "-stub"
319+
320+ when {
321+ process {
322+ """
272323 quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file ->
273324 [[ id:meta.id, is_binary:false ], phenotype_file]
274325 }
@@ -279,7 +330,7 @@ nextflow_process {
279330 file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pvar", checkIfExists: true),
280331 file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.psam", checkIfExists: true)
281332 ]
282- input[1] = sparse_grm
333+ input[1] = GCTA_TEST_PREPARE_SPARSE_DENSE.out. sparse_grm
283334 input[2] = quantitative_pheno
284335 input[3] = [[ id:'covariates_quant' ], []]
285336 input[4] = [[ id:'covariates_cat' ], []]
@@ -292,7 +343,14 @@ nextflow_process {
292343 then {
293344 assertAll(
294345 { assert process.success },
295- { assert snapshot(process.out).match() }
346+ { assert process.out.results.size() == 1 },
347+ { assert process.out.results.get(0).get(0).id == "plink_simulated" },
348+ {
349+ assert snapshot(
350+ process.out.results,
351+ process.out.findAll { key, val -> key.startsWith('versions') }
352+ ).match()
353+ }
296354 )
297355 }
298356 }
0 commit comments