Skip to content

Commit 2aca112

Browse files
committed
test(gcta/fastgwa): cover plink2 and lr execution
1 parent e97517e commit 2aca112

4 files changed

Lines changed: 192 additions & 87 deletions

File tree

modules/nf-core/gcta/fastgwa/meta.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
22
name: "gcta_fastgwa"
3-
description: Run GCTA fastGWA-MLM with PLINK genotype inputs and a sparse GRM
3+
description: Run GCTA fastGWA association modes (`--fastGWA-mlm`, `--fastGWA-mlm-exact`, and `--fastGWA-lr`) with PLINK genotype inputs
44
keywords:
55
- gcta
66
- fastgwa
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
process GCTA_TEST_PREPARE_SPARSE {
2+
tag "${meta.id}"
3+
label "process_medium"
4+
conda "${moduleDir}/../../../environment.yml"
5+
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
6+
'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' :
7+
'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }"
8+
9+
input:
10+
tuple val(meta), path(bed), path(bim), path(fam)
11+
val(grm_cutoff)
12+
13+
output:
14+
tuple val(meta), path("${meta.id}.grm.id"), path("${meta.id}.grm.sp"), emit: sparse_grm
15+
16+
script:
17+
def bfile_prefix = bed.name.replaceFirst(/\.bed$/, "")
18+
def dense_prefix = "${meta.id}_dense"
19+
def sparse_prefix = meta.id
20+
21+
"""
22+
set -euo pipefail
23+
24+
gcta \\
25+
--bfile ${bfile_prefix} \\
26+
--make-grm \\
27+
--out ${dense_prefix} \\
28+
--thread-num ${task.cpus}
29+
30+
gcta \\
31+
--grm ${dense_prefix} \\
32+
--make-bK-sparse ${grm_cutoff} \\
33+
--out ${sparse_prefix} \\
34+
--thread-num ${task.cpus}
35+
"""
36+
}

modules/nf-core/gcta/fastgwa/tests/main.nf.test

Lines changed: 112 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,8 @@ nextflow_process {
88
tag "modules_nfcore"
99
tag "gcta"
1010
tag "gcta/fastgwa"
11-
tag "gcta/makegrmpart"
12-
tag "gcta/makebksparse"
1311
tag "gawk"
12+
tag "gcta/testhelpers"
1413
config "./nextflow.config"
1514

1615
setup {
@@ -84,41 +83,17 @@ nextflow_process {
8483
}
8584
}
8685

87-
run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") {
88-
script "../../makegrmpart/main.nf"
86+
run("GCTA_TEST_PREPARE_SPARSE", alias: "GCTA_TEST_PREPARE_SPARSE_DENSE") {
87+
script "./helpers/gcta_prepare_sparse/main.nf"
8988
process {
9089
"""
91-
file('plink_simulated.mbfile').text = 'plink_simulated\\n'
92-
9390
input[0] = [
94-
[ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ],
95-
file('plink_simulated.mbfile'),
96-
[
97-
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
98-
],
99-
[
100-
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
101-
],
102-
[
103-
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
104-
]
91+
[ id:'plink_simulated_dense' ],
92+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true),
93+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true),
94+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
10595
]
106-
input[1] = [[ id:'all_variants' ], []]
107-
"""
108-
}
109-
}
110-
111-
run("GCTA_MAKEBKSPARSE", alias: "GCTA_MAKEBKSPARSE_DENSE") {
112-
script "../../makebksparse/main.nf"
113-
process {
114-
"""
115-
dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin ->
116-
def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job
117-
[[ id:prefix ], grm_id, grm_bin, grm_n_bin]
118-
}
119-
120-
input[0] = dense_grm
121-
input[1] = Channel.value(0.05)
96+
input[1] = 0.05
12297
"""
12398
}
12499
}
@@ -128,10 +103,6 @@ nextflow_process {
128103
when {
129104
process {
130105
"""
131-
sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp ->
132-
[[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp]
133-
}
134-
135106
quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file ->
136107
[[ id:meta.id, is_binary:false ], phenotype_file]
137108
}
@@ -142,7 +113,7 @@ nextflow_process {
142113
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true),
143114
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true)
144115
]
145-
input[1] = sparse_grm
116+
input[1] = GCTA_TEST_PREPARE_SPARSE_DENSE.out.sparse_grm
146117
input[2] = quantitative_pheno
147118
input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output
148119
input[4] = GAWK_CATEGORICAL_COVARIATES.out.output
@@ -159,6 +130,9 @@ nextflow_process {
159130
{ assert process.out.results.get(0).get(0).id == "plink_simulated" },
160131
{ assert process.out.results.get(0).get(1).id == "QuantitativeTrait" },
161132
{ assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_QuantitativeTrait.fastGWA" },
133+
{ assert path(process.out.results.get(0).get(2)).readLines().get(0).contains("BETA") },
134+
{ assert file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--fastGWA-mlm-exact") },
135+
{ assert file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--grm-sparse plink_simulated_dense") },
162136
{
163137
assert snapshot(
164138
process.out.results,
@@ -173,10 +147,6 @@ nextflow_process {
173147
when {
174148
process {
175149
"""
176-
sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp ->
177-
[[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp]
178-
}
179-
180150
multi_pheno = GAWK_MULTI_PHENO.out.output.map { meta, phenotype_file ->
181151
[[ id:'QuantitativeTraitMpheno2', is_binary:false ], phenotype_file]
182152
}
@@ -187,7 +157,7 @@ nextflow_process {
187157
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true),
188158
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true)
189159
]
190-
input[1] = sparse_grm
160+
input[1] = GCTA_TEST_PREPARE_SPARSE_DENSE.out.sparse_grm
191161
input[2] = multi_pheno
192162
input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output
193163
input[4] = GAWK_CATEGORICAL_COVARIATES.out.output
@@ -204,6 +174,8 @@ nextflow_process {
204174
{ assert process.out.results.get(0).get(0).id == "plink_simulated" },
205175
{ assert process.out.results.get(0).get(1).id == "QuantitativeTraitMpheno2" },
206176
{ assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_QuantitativeTraitMpheno2.fastGWA" },
177+
{ assert path(process.out.results.get(0).get(2)).readLines().get(0).contains("BETA") },
178+
{ assert file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--mpheno 2") },
207179
{
208180
assert snapshot(
209181
process.out.results,
@@ -218,21 +190,22 @@ nextflow_process {
218190
when {
219191
process {
220192
"""
221-
sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp ->
222-
[[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp]
223-
}
224-
225193
binary_pheno = GAWK_BINARY_PHENO.out.output.map { meta, phenotype_file ->
226194
[[ id:meta.id, is_binary:true ], phenotype_file]
227195
}
228196

197+
unused_sparse_id = file("${workDir}/unused_sparse.grm.id")
198+
unused_sparse_id.text = "S1 S1\\n"
199+
unused_sparse_sp = file("${workDir}/unused_sparse.grm.sp")
200+
unused_sparse_sp.text = "1 1 1\\n"
201+
229202
input[0] = [
230203
[ id:"plink_simulated" ],
231204
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true),
232205
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true),
233206
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true)
234207
]
235-
input[1] = sparse_grm
208+
input[1] = [[ id:'unused_sparse' ], unused_sparse_id, unused_sparse_sp]
236209
input[2] = binary_pheno
237210
input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output
238211
input[4] = GAWK_CATEGORICAL_COVARIATES.out.output
@@ -249,6 +222,8 @@ nextflow_process {
249222
{ assert process.out.results.get(0).get(0).id == "plink_simulated" },
250223
{ assert process.out.results.get(0).get(1).id == "BinaryTrait" },
251224
{ assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_BinaryTrait.fastGWA" },
225+
{ assert file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--fastGWA-lr") },
226+
{ assert !file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--grm-sparse") },
252227
{
253228
assert snapshot(
254229
process.out.results,
@@ -259,16 +234,92 @@ nextflow_process {
259234
}
260235
}
261236

262-
test("homo_sapiens popgen - plink2 with sparse GRM - stub") {
263-
options "-stub"
237+
test("homo_sapiens popgen - plink2 with sparse GRM and quantitative phenotype") {
238+
239+
when {
240+
process {
241+
"""
242+
quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file ->
243+
[[ id:meta.id, is_binary:false ], phenotype_file]
244+
}
245+
246+
input[0] = [
247+
[ id:"plink_simulated" ],
248+
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pgen", checkIfExists: true),
249+
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pvar", checkIfExists: true),
250+
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.psam", checkIfExists: true)
251+
]
252+
input[1] = GCTA_TEST_PREPARE_SPARSE_DENSE.out.sparse_grm
253+
input[2] = quantitative_pheno
254+
input[3] = [[ id:'covariates_quant' ], []]
255+
input[4] = [[ id:'covariates_cat' ], []]
256+
input[5] = false
257+
input[6] = 1
258+
"""
259+
}
260+
}
261+
262+
then {
263+
assertAll(
264+
{ assert process.success },
265+
{ assert process.out.results.size() == 1 },
266+
{ assert process.out.results.get(0).get(0).id == "plink_simulated" },
267+
{ assert process.out.results.get(0).get(1).id == "QuantitativeTrait" },
268+
{ assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_QuantitativeTrait.fastGWA" },
269+
{ assert path(process.out.results.get(0).get(2)).readLines().get(0).contains("BETA") },
270+
{
271+
assert snapshot(
272+
process.out.results,
273+
process.out.findAll { key, val -> key.startsWith('versions') }
274+
).match()
275+
}
276+
)
277+
}
278+
}
264279

280+
test("homo_sapiens popgen - non-binary fails when sparse GRM prefix mismatches files") {
265281
when {
266282
process {
267283
"""
268-
sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp ->
269-
[[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp]
284+
quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file ->
285+
[[ id:meta.id, is_binary:false ], phenotype_file]
286+
}
287+
288+
sparse_grm_bad_prefix = GCTA_TEST_PREPARE_SPARSE_DENSE.out.sparse_grm.map { meta, sparse_grm_id, sparse_grm_sp ->
289+
[[ id:'incorrect_sparse_prefix' ], sparse_grm_id, sparse_grm_sp]
270290
}
271291

292+
input[0] = [
293+
[ id:"plink_simulated" ],
294+
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true),
295+
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true),
296+
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true)
297+
]
298+
input[1] = sparse_grm_bad_prefix
299+
input[2] = quantitative_pheno
300+
input[3] = [[ id:'covariates_quant' ], []]
301+
input[4] = [[ id:'covariates_cat' ], []]
302+
input[5] = false
303+
input[6] = 1
304+
"""
305+
}
306+
}
307+
308+
then {
309+
assertAll(
310+
{ assert !process.success },
311+
{ assert process.exitStatus != 0 },
312+
{ assert process.stdout.toString().contains("incorrect_sparse_prefix") }
313+
)
314+
}
315+
}
316+
317+
test("homo_sapiens popgen - plink2 with sparse GRM - stub") {
318+
options "-stub"
319+
320+
when {
321+
process {
322+
"""
272323
quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file ->
273324
[[ id:meta.id, is_binary:false ], phenotype_file]
274325
}
@@ -279,7 +330,7 @@ nextflow_process {
279330
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pvar", checkIfExists: true),
280331
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.psam", checkIfExists: true)
281332
]
282-
input[1] = sparse_grm
333+
input[1] = GCTA_TEST_PREPARE_SPARSE_DENSE.out.sparse_grm
283334
input[2] = quantitative_pheno
284335
input[3] = [[ id:'covariates_quant' ], []]
285336
input[4] = [[ id:'covariates_cat' ], []]
@@ -292,7 +343,14 @@ nextflow_process {
292343
then {
293344
assertAll(
294345
{ assert process.success },
295-
{ assert snapshot(process.out).match() }
346+
{ assert process.out.results.size() == 1 },
347+
{ assert process.out.results.get(0).get(0).id == "plink_simulated" },
348+
{
349+
assert snapshot(
350+
process.out.results,
351+
process.out.findAll { key, val -> key.startsWith('versions') }
352+
).match()
353+
}
296354
)
297355
}
298356
}

0 commit comments

Comments
 (0)