Skip to content

Commit 7871dee

Browse files
committed
Normalize gcta/bivariateremlldms LDMS setup
1 parent c5343eb commit 7871dee

9 files changed

Lines changed: 341 additions & 123 deletions

File tree

modules/nf-core/gcta/bivariateremlldms/main.nf

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,20 @@ process GCTA_BIVARIATEREMLLDMS {
22
tag "bivariate_reml_ldms_${meta.id}_${meta2.id}"
33
label 'process_medium'
44
conda "${moduleDir}/environment.yml"
5-
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
6-
'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' :
7-
'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }"
5+
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
6+
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data'
7+
: 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}"
88

99
input:
10-
tuple val(meta), path(phenotype_file)
10+
tuple val(meta), path(phenotype_file), val(mpheno), val(prevalence)
1111
tuple val(meta2), path(mgrm_file), path(grm_files)
1212
tuple val(meta3), path(quant_covariates_file)
1313
tuple val(meta4), path(cat_covariates_file)
1414

1515
output:
1616
tuple val(meta), path("*.hsq"), emit: bivariate_results
1717
tuple val(meta), path("*.log"), emit: log_file
18-
tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions
18+
tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions
1919

2020
when:
2121
task.ext.when == null || task.ext.when
@@ -26,13 +26,16 @@ process GCTA_BIVARIATEREMLLDMS {
2626
def qcovar_param = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : ''
2727
def covar_param = cat_covariates_file ? "--covar ${cat_covariates_file}" : ''
2828
def extra_args = task.ext.args ?: ''
29+
def bivar_traits = mpheno ?: '1 2'
30+
def prevalence_param = prevalence ? "--reml-bivar-prevalence ${prevalence}" : ''
2931

3032
"""
3133
3234
gcta \\
33-
--reml-bivar 1 2 \\
35+
--reml-bivar ${bivar_traits} \\
3436
--mgrm ${mgrm_file} \\
3537
--pheno "${phenotype_file}" \\
38+
${prevalence_param} \\
3639
${qcovar_param} \\
3740
${covar_param} \\
3841
--reml-bivar-no-constrain \\

modules/nf-core/gcta/bivariateremlldms/meta.yml

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,23 @@ name: "gcta_bivariateremlldms"
33
description: Run bivariate REML-LDMS analysis with an MGRM manifest
44
keywords:
55
- gcta
6+
- genome-wide complex trait analysis
67
- reml
8+
- restricted maximum likelihood
79
- bivariate
810
- ldms
11+
- linkage disequilibrium and minor allele frequency stratification
12+
- grm
13+
- genetic relationship matrix
914
- genetics
1015
tools:
1116
- "gcta":
1217
description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data."
1318
homepage: "https://yanglab.westlake.edu.cn/software/gcta/"
1419
documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf"
1520
tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/"
21+
licence: ["GPL-3.0-only"]
22+
identifier: "biotools:gcta"
1623
input:
1724
- - meta:
1825
type: map
@@ -25,6 +32,16 @@ input:
2532
pattern: "*.{phe,pheno,txt,tsv}"
2633
ontologies:
2734
- edam: "http://edamontology.org/format_3475"
35+
- mpheno:
36+
type: string
37+
description: |
38+
Optional pair of phenotype column indices passed to `--reml-bivar`;
39+
pass `[]` to use the module default of `1 2`
40+
- prevalence:
41+
type: string
42+
description: |
43+
Optional pair of disease prevalence values passed to `--reml-bivar-prevalence`
44+
for binary traits; pass `[]` for quantitative traits
2845
- - meta2:
2946
type: map
3047
description: |
@@ -38,8 +55,8 @@ input:
3855
- edam: "http://edamontology.org/format_2330"
3956
- grm_files:
4057
type: file
41-
description: GRM sidecar files referenced by `mgrm_file`
42-
pattern: "*"
58+
description: GRM bundles referenced by `mgrm_file`
59+
pattern: "*.grm.*"
4360
ontologies: []
4461
- - meta3:
4562
type: map
@@ -95,7 +112,7 @@ output:
95112
- "gcta":
96113
type: string
97114
description: The tool name
98-
- "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'":
115+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
99116
type: eval
100117
description: The command used to retrieve the GCTA version
101118
topics:
@@ -106,10 +123,10 @@ topics:
106123
- gcta:
107124
type: string
108125
description: The tool name
109-
- gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//':
126+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
110127
type: eval
111128
description: The command used to retrieve the GCTA version
112129
authors:
113-
- "@andongni"
130+
- "@lyh970817"
114131
maintainers:
115-
- "@andongni"
132+
- "@lyh970817"

modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test

Lines changed: 81 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,20 @@ nextflow_process {
2828
}
2929
}
3030

31+
run("GAWK", alias: "GAWK_BIVARIATE_BINARY_PHENO") {
32+
script "../../../gawk/main.nf"
33+
process {
34+
"""
35+
input[0] = [
36+
[ id:'BinaryTrait1__BinaryTrait2' ],
37+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true)
38+
]
39+
input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, (NR % 2) + 1, (NR % 3 == 0 ? 2 : 1) }').collectFile(name:'bivariate_binary_phenotype.awk')
40+
input[2] = false
41+
"""
42+
}
43+
}
44+
3145
run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") {
3246
script "../../../gawk/main.nf"
3347
process {
@@ -73,14 +87,16 @@ nextflow_process {
7387
}
7488
}
7589

76-
run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRM_LDMS1") {
90+
run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS1") {
7791
script "../../makegrmpart/main.nf"
7892
process {
7993
"""
8094
file('plink_simulated.mbfile').text = 'plink_simulated\\n'
8195

8296
input[0] = [
83-
[ id:'plink_simulated_ldms1', part_gcta_job:1, nparts_gcta:1 ],
97+
[ id:'plink_simulated_ldms1' ],
98+
1,
99+
1,
84100
file('plink_simulated.mbfile'),
85101
[
86102
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
@@ -100,14 +116,16 @@ nextflow_process {
100116
}
101117
}
102118

103-
run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRM_LDMS2") {
119+
run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS2") {
104120
script "../../makegrmpart/main.nf"
105121
process {
106122
"""
107123
file('plink_simulated.mbfile').text = 'plink_simulated\\n'
108124

109125
input[0] = [
110-
[ id:'plink_simulated_ldms2', part_gcta_job:1, nparts_gcta:1 ],
126+
[ id:'plink_simulated_ldms2' ],
127+
1,
128+
1,
111129
file('plink_simulated.mbfile'),
112130
[
113131
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
@@ -134,13 +152,13 @@ nextflow_process {
134152
.of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1')
135153
.collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
136154

137-
ldms_grm_files = GCTA_MAKEGRM_LDMS1.out.grm_files
138-
.mix(GCTA_MAKEGRM_LDMS2.out.grm_files)
139-
.map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] }
155+
ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files
156+
.mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files)
157+
.map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files }
140158
.collect()
141159
.map { rows -> rows.flatten() }
142160

143-
input[0] = GAWK_BIVARIATE_PHENO.out.output
161+
input[0] = GAWK_BIVARIATE_PHENO.out.output.map { meta, phenotype_file -> [meta, phenotype_file, [], []] }
144162
input[1] = mgrm_file
145163
.combine(ldms_grm_files)
146164
.map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] }
@@ -187,6 +205,53 @@ nextflow_process {
187205
}
188206
}
189207

208+
test("homo_sapiens popgen - binary bivariate phenotype with ldms mgrm and prevalence") {
209+
config "./nextflow.config"
210+
when {
211+
process {
212+
"""
213+
mgrm_file = Channel
214+
.of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1')
215+
.collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
216+
217+
ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files
218+
.mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files)
219+
.map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files }
220+
.collect()
221+
.map { rows -> rows.flatten() }
222+
223+
input[0] = GAWK_BIVARIATE_BINARY_PHENO.out.output.map { meta, phenotype_file -> [meta, phenotype_file, '1 2', '0.30 0.25'] }
224+
input[1] = mgrm_file
225+
.combine(ldms_grm_files)
226+
.map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] }
227+
input[2] = [[ id:'covariates_quant' ], []]
228+
input[3] = [[ id:'covariates_cat' ], []]
229+
"""
230+
}
231+
}
232+
233+
then {
234+
assertAll(
235+
{ assert process.success },
236+
{ assert process.out.bivariate_results.size() == 1 },
237+
{ assert process.out.log_file.size() == 1 },
238+
{ assert process.out.bivariate_results.get(0).get(0).id == "BinaryTrait1__BinaryTrait2" },
239+
{ assert file(process.out.bivariate_results.get(0).get(1)).name == "BinaryTrait1__BinaryTrait2.hsq" },
240+
{ assert file(process.out.log_file.get(0).get(1)).name == "BinaryTrait1__BinaryTrait2.log" },
241+
{
242+
def logText = file(process.out.log_file.get(0).get(1)).text
243+
assert logText.contains("--reml-bivar-prevalence 0.3 0.25")
244+
},
245+
{
246+
assert snapshot(
247+
process.out.bivariate_results,
248+
process.out.findAll { key, val -> key.startsWith('versions') }
249+
).match()
250+
}
251+
)
252+
}
253+
}
254+
190255
test("homo_sapiens popgen - bivariate phenotype fails when mgrm references missing GRM basename") {
191256
config "./nextflow.config"
192257
when {
@@ -196,13 +261,13 @@ nextflow_process {
196261
.of('plink_simulated_ldms_missing.part_1_1')
197262
.collectFile(name:'plink_simulated_ldms_broken.mgrm', newLine: true)
198263

199-
ldms_grm_files = GCTA_MAKEGRM_LDMS1.out.grm_files
200-
.mix(GCTA_MAKEGRM_LDMS2.out.grm_files)
201-
.map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] }
264+
ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files
265+
.mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files)
266+
.map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files }
202267
.collect()
203268
.map { rows -> rows.flatten() }
204269

205-
input[0] = GAWK_BIVARIATE_PHENO.out.output
270+
input[0] = GAWK_BIVARIATE_PHENO.out.output.map { meta, phenotype_file -> [meta, phenotype_file, [], []] }
206271
input[1] = broken_mgrm_file
207272
.combine(ldms_grm_files)
208273
.map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] }
@@ -231,13 +296,13 @@ nextflow_process {
231296
.of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1')
232297
.collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
233298

234-
ldms_grm_files = GCTA_MAKEGRM_LDMS1.out.grm_files
235-
.mix(GCTA_MAKEGRM_LDMS2.out.grm_files)
236-
.map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] }
299+
ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files
300+
.mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files)
301+
.map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files }
237302
.collect()
238303
.map { rows -> rows.flatten() }
239304

240-
input[0] = GAWK_BIVARIATE_PHENO.out.output
305+
input[0] = GAWK_BIVARIATE_PHENO.out.output.map { meta, phenotype_file -> [meta, phenotype_file, [], []] }
241306
input[1] = mgrm_file
242307
.combine(ldms_grm_files)
243308
.map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] }

modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,5 +177,31 @@
177177
"nextflow": "25.10.4"
178178
},
179179
"timestamp": "2026-03-21T00:39:38.748257872"
180+
},
181+
"homo_sapiens popgen - binary bivariate phenotype with ldms mgrm and prevalence": {
182+
"content": [
183+
[
184+
[
185+
{
186+
"id": "BinaryTrait1__BinaryTrait2"
187+
},
188+
"BinaryTrait1__BinaryTrait2.hsq:md5,56f5b427deec963764e25a9acad76b80"
189+
]
190+
],
191+
{
192+
"versions_gcta": [
193+
[
194+
"GCTA_BIVARIATEREMLLDMS",
195+
"gcta",
196+
"1.94.1"
197+
]
198+
]
199+
}
200+
],
201+
"meta": {
202+
"nf-test": "0.9.3",
203+
"nextflow": "25.10.4"
204+
},
205+
"timestamp": "2026-05-13T16:07:21.380446323"
180206
}
181-
}
207+
}
Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
11
process GCTA_MAKEGRMPART {
2-
tag "part ${meta.part_gcta_job} of ${meta.nparts_gcta} (${meta.id})"
2+
tag "${meta.id}: part ${part_gcta_job} of ${nparts_gcta}"
33
label 'process_medium'
44
conda "${moduleDir}/environment.yml"
5-
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
6-
'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' :
7-
'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }"
5+
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
6+
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data'
7+
: 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}"
88

99
input:
10-
tuple val(meta), path(mfile), path(bed_pgen), path(bim_pvar), path(fam_psam)
10+
tuple val(meta), val(nparts_gcta), val(part_gcta_job), path(mfile), path(bed_pgen), path(bim_pvar), path(fam_psam)
1111
tuple val(meta2), path(snp_group_file)
1212

1313
output:
14-
tuple val(meta), path("*.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.id"), path("*.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.bin"), path("*.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.N.bin"), emit: grm_files
15-
tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions
14+
tuple val(meta), path("*.part_${nparts}_${part}.grm.*"), val(nparts_gcta), val(part_gcta_job), emit: grm_files
15+
tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions
1616

1717
when:
1818
task.ext.when == null || task.ext.when
1919

2020
script:
21-
def part_gcta_job = meta.part_gcta_job
22-
def nparts_gcta = meta.nparts_gcta
21+
nparts = nparts_gcta ?: 1
22+
part = part_gcta_job ?: 1
2323
def extract_cmd = snp_group_file ? "--extract ${snp_group_file}" : ''
2424
def extra_args = task.ext.args ?: ''
2525
def prefix = task.ext.prefix ?: "${meta.id}"
@@ -28,21 +28,21 @@ process GCTA_MAKEGRMPART {
2828
def multi_file_flag = genotype_extension == 'pgen' ? '--mpfile' : '--mbfile'
2929

3030
"""
31-
3231
gcta \\
3332
${multi_file_flag} ${mfile} \\
34-
--make-grm-part ${nparts_gcta} ${part_gcta_job} \\
33+
--make-grm-part ${nparts} ${part} \\
3534
${extract_cmd} \\
36-
--maf 0.01 \\
3735
--thread-num ${task.cpus} \\
3836
--out ${prefix} ${extra_args}
3937
"""
4038

4139
stub:
40+
nparts = nparts_gcta ?: 1
41+
part = part_gcta_job ?: 1
4242
def prefix = task.ext.prefix ?: "${meta.id}"
4343
"""
44-
touch ${prefix}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.id
45-
touch ${prefix}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.bin
46-
touch ${prefix}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.N.bin
44+
touch ${prefix}.part_${nparts}_${part}.grm.id
45+
touch ${prefix}.part_${nparts}_${part}.grm.bin
46+
touch ${prefix}.part_${nparts}_${part}.grm.N.bin
4747
"""
4848
}

0 commit comments

Comments
 (0)