Skip to content

Commit 9198ed5

Browse files
committed
Normalize gcta/remlldms LDMS setup
1 parent a535047 commit 9198ed5

4 files changed

Lines changed: 135 additions & 50 deletions

File tree

modules/nf-core/gcta/remlldms/main.nf

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,28 @@ process GCTA_REMLLDMS {
22
tag "gcta_reml_ldms_${meta.id}_${meta2.id}"
33
label 'process_medium'
44
conda "${moduleDir}/environment.yml"
5-
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
6-
'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' :
7-
'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }"
5+
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
6+
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data'
7+
: 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}"
88

99
input:
10-
tuple val(meta), path(phenotypes_file)
10+
tuple val(meta), path(phenotypes_file), val(mpheno), val(prevalence)
1111
tuple val(meta2), path(mgrm_file), path(grm_files)
1212
tuple val(meta3), path(quant_covariates_file)
1313
tuple val(meta4), path(cat_covariates_file)
14-
val(mpheno)
1514

1615
output:
1716
tuple val(meta), path("*.hsq"), emit: reml_results
18-
tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions
17+
tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions
1918

2019
when:
2120
task.ext.when == null || task.ext.when
2221

2322
script:
2423
def prefix = task.ext.prefix ?: "${meta.id}"
25-
def mpheno_value = (mpheno == null || (mpheno instanceof Collection && mpheno.isEmpty())) ? 1 : mpheno
24+
def mpheno_value = mpheno ?: 1
2625
def mpheno_param = "--mpheno ${mpheno_value}"
26+
def prevalence_param = prevalence ? "--prevalence ${prevalence}" : ''
2727
def qcovar_param = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : ''
2828
def covar_param = cat_covariates_file ? "--covar ${cat_covariates_file}" : ''
2929
def extra_args = task.ext.args ?: ''
@@ -32,10 +32,12 @@ process GCTA_REMLLDMS {
3232
set -euo pipefail
3333
3434
gcta \\
35+
--reml \\
3536
--reml-no-constrain \\
3637
--mgrm ${mgrm_file} \\
3738
--pheno ${phenotypes_file} \\
3839
${mpheno_param} \\
40+
${prevalence_param} \\
3941
${qcovar_param} \\
4042
${covar_param} \\
4143
--out "${prefix}" \\

modules/nf-core/gcta/remlldms/meta.yml

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,22 @@ name: "gcta_remlldms"
33
description: Run REML-LDMS heritability estimation with an MGRM manifest
44
keywords:
55
- gcta
6+
- genome-wide complex trait analysis
67
- reml
8+
- restricted maximum likelihood
79
- ldms
10+
- linkage disequilibrium and minor allele frequency stratification
11+
- grm
12+
- genetic relationship matrix
813
- genetics
914
tools:
1015
- "gcta":
1116
description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data."
1217
homepage: "https://yanglab.westlake.edu.cn/software/gcta/"
1318
documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf"
1419
tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/"
20+
licence: ["GPL-3.0-only"]
21+
identifier: "biotools:gcta"
1522
input:
1623
- - meta:
1724
type: map
@@ -24,6 +31,17 @@ input:
2431
pattern: "*.{phe,pheno,txt,tsv}"
2532
ontologies:
2633
- edam: "http://edamontology.org/format_3475"
34+
- mpheno:
35+
type: integer
36+
description: |
37+
Phenotype column selector passed to `--mpheno`.
38+
Pass `[]` or `null` to use the default first phenotype column
39+
(`--mpheno 1`).
40+
- prevalence:
41+
type: float
42+
description: |
43+
Population prevalence passed to `--prevalence` for case-control traits.
44+
Pass `[]` or `null` for quantitative traits.
2745
- - meta2:
2846
type: map
2947
description: |
@@ -37,8 +55,8 @@ input:
3755
- edam: "http://edamontology.org/format_2330"
3856
- grm_files:
3957
type: file
40-
description: GRM sidecar files referenced by `mgrm_file`
41-
pattern: "*"
58+
description: GRM bundles referenced by `mgrm_file`
59+
pattern: "*.grm.*"
4260
ontologies: []
4361
- - meta3:
4462
type: map
@@ -62,11 +80,6 @@ input:
6280
pattern: "*.{covar,cov,txt,tsv}"
6381
ontologies:
6482
- edam: "http://edamontology.org/format_3475"
65-
- mpheno:
66-
type: integer
67-
description: |
68-
Phenotype column selector passed to `--mpheno`.
69-
Pass `1` explicitly for the default first phenotype column.
7083
output:
7184
reml_results:
7285
- - meta:
@@ -87,7 +100,7 @@ output:
87100
- "gcta":
88101
type: string
89102
description: The tool name
90-
- "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'":
103+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
91104
type: eval
92105
description: The command used to retrieve the GCTA version
93106
topics:
@@ -98,10 +111,10 @@ topics:
98111
- gcta:
99112
type: string
100113
description: The tool name
101-
- gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//':
114+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
102115
type: eval
103116
description: The command used to retrieve the GCTA version
104117
authors:
105-
- "@andongni"
118+
- "@lyh970817"
106119
maintainers:
107-
- "@andongni"
120+
- "@lyh970817"

modules/nf-core/gcta/remlldms/tests/main.nf.test

Lines changed: 76 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,20 @@ nextflow_process {
4040
}
4141
}
4242

43+
run("GAWK", alias: "GAWK_BINARY_PHENOTYPE") {
44+
script "../../../gawk/main.nf"
45+
process {
46+
"""
47+
input[0] = [
48+
[ id:'BinaryTrait' ],
49+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_binary_phenoname.phe', checkIfExists: true)
50+
]
51+
input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 + 1 }').collectFile(name:'binary_phenotypes.awk')
52+
input[2] = false
53+
"""
54+
}
55+
}
56+
4357
run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") {
4458
script "../../../gawk/main.nf"
4559
process {
@@ -68,14 +82,16 @@ nextflow_process {
6882
}
6983
}
7084

71-
run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRM_LDMS1") {
85+
run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS1") {
7286
script "../../makegrmpart/main.nf"
7387
process {
7488
"""
7589
file('plink_simulated.mbfile').text = 'plink_simulated\\n'
7690

7791
input[0] = [
78-
[ id:'plink_simulated_ldms1', part_gcta_job:1, nparts_gcta:1 ],
92+
[ id:'plink_simulated_ldms1' ],
93+
1,
94+
1,
7995
file('plink_simulated.mbfile'),
8096
[
8197
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
@@ -98,17 +114,16 @@ nextflow_process {
98114
when {
99115
process {
100116
"""
101-
input[0] = GAWK_MULTI_PHENOTYPES.out.output.map { meta, pheno -> [[ id:'QuantitativeTraitMpheno2' ], pheno] }
117+
input[0] = GAWK_MULTI_PHENOTYPES.out.output.map { meta, pheno -> [[ id:'QuantitativeTraitMpheno2' ], pheno, 2, []] }
102118
input[1] = Channel
103119
.of('plink_simulated_ldms1.part_1_1')
104120
.collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
105-
.combine(GCTA_MAKEGRM_LDMS1.out.grm_files)
106-
.map { mgrm_file, meta, grm_id, grm_bin, grm_n_bin ->
107-
[[ id:'plink_simulated_ldms' ], mgrm_file, [grm_id, grm_bin, grm_n_bin]]
121+
.combine(GCTA_MAKEGRMPART_LDMS1.out.grm_files)
122+
.map { mgrm_file, meta, grm_files, nparts_gcta, part_gcta_job ->
123+
[[ id:'plink_simulated_ldms' ], mgrm_file, grm_files]
108124
}
109125
input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output
110126
input[3] = GAWK_CATEGORICAL_COVARIATES.out.output
111-
input[4] = 2
112127
"""
113128
}
114129
}
@@ -133,17 +148,16 @@ nextflow_process {
133148
when {
134149
process {
135150
"""
136-
input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output
151+
input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output.map { meta, pheno -> [meta, pheno, 1, []] }
137152
input[1] = Channel
138153
.of('plink_simulated_ldms1.part_1_1')
139154
.collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
140-
.combine(GCTA_MAKEGRM_LDMS1.out.grm_files)
141-
.map { mgrm_file, meta, grm_id, grm_bin, grm_n_bin ->
142-
[[ id:'plink_simulated_ldms' ], mgrm_file, [grm_id, grm_bin, grm_n_bin]]
155+
.combine(GCTA_MAKEGRMPART_LDMS1.out.grm_files)
156+
.map { mgrm_file, meta, grm_files, nparts_gcta, part_gcta_job ->
157+
[[ id:'plink_simulated_ldms' ], mgrm_file, grm_files]
143158
}
144159
input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output
145160
input[3] = GAWK_CATEGORICAL_COVARIATES.out.output
146-
input[4] = 1
147161
"""
148162
}
149163
}
@@ -168,17 +182,16 @@ nextflow_process {
168182
when {
169183
process {
170184
"""
171-
input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output
185+
input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output.map { meta, pheno -> [meta, pheno, 1, null] }
172186
input[1] = Channel
173187
.of('plink_simulated_ldms1.part_1_1')
174188
.collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
175-
.combine(GCTA_MAKEGRM_LDMS1.out.grm_files)
176-
.map { mgrm_file, meta, grm_id, grm_bin, grm_n_bin ->
177-
[[ id:'plink_simulated_ldms' ], mgrm_file, [grm_id, grm_bin, grm_n_bin]]
189+
.combine(GCTA_MAKEGRMPART_LDMS1.out.grm_files)
190+
.map { mgrm_file, meta, grm_files, nparts_gcta, part_gcta_job ->
191+
[[ id:'plink_simulated_ldms' ], mgrm_file, grm_files]
178192
}
179193
input[2] = [[ id:'covariates_quant' ], []]
180194
input[3] = [[ id:'covariates_cat' ], []]
181-
input[4] = 1
182195
"""
183196
}
184197
}
@@ -198,22 +211,55 @@ nextflow_process {
198211
}
199212
}
200213

214+
test("homo_sapiens gsmr - binary phenotype with ldms mgrm and prevalence") {
215+
config "./nextflow.config"
216+
when {
217+
process {
218+
"""
219+
input[0] = GAWK_BINARY_PHENOTYPE.out.output.map { meta, pheno -> [[ id:'BinaryTraitPrevalence' ], pheno, 1, 0.1] }
220+
input[1] = Channel
221+
.of('plink_simulated_ldms1.part_1_1')
222+
.collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
223+
.combine(GCTA_MAKEGRMPART_LDMS1.out.grm_files)
224+
.map { mgrm_file, meta, grm_files, nparts_gcta, part_gcta_job ->
225+
[[ id:'plink_simulated_ldms' ], mgrm_file, grm_files]
226+
}
227+
input[2] = [[ id:'covariates_quant' ], []]
228+
input[3] = [[ id:'covariates_cat' ], []]
229+
"""
230+
}
231+
}
232+
233+
then {
234+
assertAll(
235+
{ assert process.success },
236+
{ assert process.out.reml_results.size() == 1 },
237+
{ assert process.out.reml_results.get(0).get(0).id == "BinaryTraitPrevalence" },
238+
{
239+
assert snapshot(
240+
process.out.reml_results,
241+
process.out.findAll { key, val -> key.startsWith('versions') }
242+
).match()
243+
}
244+
)
245+
}
246+
}
247+
201248
test("homo_sapiens gsmr - ldms mgrm mpheno defaults to first phenotype when empty") {
202249
config "./nextflow.config"
203250
when {
204251
process {
205252
"""
206-
input[0] = GAWK_MULTI_PHENOTYPES.out.output.map { meta, pheno -> [[ id:'QuantitativeTraitMphenoDefault' ], pheno] }
253+
input[0] = GAWK_MULTI_PHENOTYPES.out.output.map { meta, pheno -> [[ id:'QuantitativeTraitMphenoDefault' ], pheno, [], []] }
207254
input[1] = Channel
208255
.of('plink_simulated_ldms1.part_1_1')
209256
.collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
210-
.combine(GCTA_MAKEGRM_LDMS1.out.grm_files)
211-
.map { mgrm_file, meta, grm_id, grm_bin, grm_n_bin ->
212-
[[ id:'plink_simulated_ldms' ], mgrm_file, [grm_id, grm_bin, grm_n_bin]]
257+
.combine(GCTA_MAKEGRMPART_LDMS1.out.grm_files)
258+
.map { mgrm_file, meta, grm_files, nparts_gcta, part_gcta_job ->
259+
[[ id:'plink_simulated_ldms' ], mgrm_file, grm_files]
213260
}
214261
input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output
215262
input[3] = GAWK_CATEGORICAL_COVARIATES.out.output
216-
input[4] = []
217263
"""
218264
}
219265
}
@@ -238,17 +284,16 @@ nextflow_process {
238284
when {
239285
process {
240286
"""
241-
input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output.map { meta, pheno -> [[ id:'QuantitativeTraitInvalidLdms' ], pheno] }
287+
input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output.map { meta, pheno -> [[ id:'QuantitativeTraitInvalidLdms' ], pheno, 1, []] }
242288
input[1] = Channel
243289
.of('plink_simulated_ldms_missing.part_1_1')
244290
.collectFile(name:'plink_simulated_ldms_broken.mgrm', newLine: true)
245-
.combine(GCTA_MAKEGRM_LDMS1.out.grm_files)
246-
.map { mgrm_file, meta, grm_id, grm_bin, grm_n_bin ->
247-
[[ id:'plink_simulated_ldms' ], mgrm_file, [grm_id, grm_bin, grm_n_bin]]
291+
.combine(GCTA_MAKEGRMPART_LDMS1.out.grm_files)
292+
.map { mgrm_file, meta, grm_files, nparts_gcta, part_gcta_job ->
293+
[[ id:'plink_simulated_ldms' ], mgrm_file, grm_files]
248294
}
249295
input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output
250296
input[3] = GAWK_CATEGORICAL_COVARIATES.out.output
251-
input[4] = 1
252297
"""
253298
}
254299
}
@@ -268,17 +313,16 @@ nextflow_process {
268313
when {
269314
process {
270315
"""
271-
input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output
316+
input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output.map { meta, pheno -> [meta, pheno, 1, []] }
272317
input[1] = Channel
273318
.of('plink_simulated_ldms1.part_1_1')
274319
.collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
275-
.combine(GCTA_MAKEGRM_LDMS1.out.grm_files)
276-
.map { mgrm_file, meta, grm_id, grm_bin, grm_n_bin ->
277-
[[ id:'plink_simulated_ldms' ], mgrm_file, [grm_id, grm_bin, grm_n_bin]]
320+
.combine(GCTA_MAKEGRMPART_LDMS1.out.grm_files)
321+
.map { mgrm_file, meta, grm_files, nparts_gcta, part_gcta_job ->
322+
[[ id:'plink_simulated_ldms' ], mgrm_file, grm_files]
278323
}
279324
input[2] = [[ id:'covariates_quant' ], []]
280325
input[3] = [[ id:'covariates_cat' ], []]
281-
input[4] = 1
282326
"""
283327
}
284328
}

modules/nf-core/gcta/remlldms/tests/main.nf.test.snap

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,5 +128,31 @@
128128
"nextflow": "25.10.4"
129129
},
130130
"timestamp": "2026-03-21T18:45:48.772452882"
131+
},
132+
"homo_sapiens gsmr - binary phenotype with ldms mgrm and prevalence": {
133+
"content": [
134+
[
135+
[
136+
{
137+
"id": "BinaryTraitPrevalence"
138+
},
139+
"BinaryTraitPrevalence.hsq:md5,7abad219d974e43713fc65d61f62fd30"
140+
]
141+
],
142+
{
143+
"versions_gcta": [
144+
[
145+
"GCTA_REMLLDMS",
146+
"gcta",
147+
"1.94.1"
148+
]
149+
]
150+
}
151+
],
152+
"meta": {
153+
"nf-test": "0.9.3",
154+
"nextflow": "25.10.4"
155+
},
156+
"timestamp": "2026-05-13T16:08:04.492576073"
131157
}
132158
}

0 commit comments

Comments
 (0)