Skip to content

Commit c3226e0

Browse files
committed
Normalize gcta/addgrms GRM inputs
1 parent cb9949e commit c3226e0

4 files changed

Lines changed: 70 additions & 76 deletions

File tree

modules/nf-core/gcta/addgrms/main.nf

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,32 @@ process GCTA_ADDGRMS {
22
tag "${meta.id}"
33
label 'process_medium'
44
conda "${moduleDir}/environment.yml"
5-
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
6-
'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' :
7-
'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }"
5+
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
6+
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data'
7+
: 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}"
88

99
input:
1010
tuple val(meta), path(mgrm_file), path(grm_files)
1111

1212
output:
13-
tuple val(meta), path("*.grm.id"), path("*.grm.bin"), path("*.grm.N.bin"), emit: combined_grm
14-
tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions
13+
tuple val(meta), path("*.grm.*"), emit: combined_grm
14+
tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions
1515

1616
when:
1717
task.ext.when == null || task.ext.when
1818

1919
script:
20-
def args = task.ext.args ?: ''
20+
def extra_args = task.ext.args ?: ''
2121
def prefix = task.ext.prefix ?: "${meta.id}"
2222

2323
"""
24+
2425
gcta \\
2526
--mgrm ${mgrm_file} \\
2627
--make-grm \\
2728
--out ${prefix} \\
2829
--thread-num ${task.cpus} \\
29-
${args}
30+
${extra_args}
3031
"""
3132

3233
stub:

modules/nf-core/gcta/addgrms/meta.yml

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,17 @@ name: "gcta_addgrms"
33
description: Combine multiple GRMs listed in an MGRM manifest into a single dense GRM
44
keywords:
55
- gcta
6+
- genome-wide complex trait analysis
67
- grm
8+
- genetic relationship matrix
79
- genetics
810
tools:
911
- "gcta":
1012
description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data."
1113
homepage: "https://yanglab.westlake.edu.cn/software/gcta/"
1214
documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf"
1315
tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/"
16+
licence: ["GPL-3.0-only"]
1417
identifier: "biotools:gcta"
1518

1619
input:
@@ -38,20 +41,10 @@ output:
3841
description: |
3942
Groovy map containing combined GRM metadata
4043
e.g. `[ id:'plink_simulated' ]`
41-
- "*.grm.id":
44+
- "*.grm.*":
4245
type: file
43-
description: Combined GRM sample identifier file
44-
pattern: "*.grm.id"
45-
ontologies: []
46-
- "*.grm.bin":
47-
type: file
48-
description: Combined GRM binary matrix file
49-
pattern: "*.grm.bin"
50-
ontologies: []
51-
- "*.grm.N.bin":
52-
type: file
53-
description: Combined GRM sample-count matrix file
54-
pattern: "*.grm.N.bin"
46+
description: Combined dense GRM sidecar files
47+
pattern: "*.grm.{id,bin,N.bin}"
5548
ontologies: []
5649
versions_gcta:
5750
- - "${task.process}":
@@ -60,7 +53,7 @@ output:
6053
- "gcta":
6154
type: string
6255
description: The tool name
63-
- "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'":
56+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
6457
type: eval
6558
description: The command used to retrieve the GCTA version
6659

@@ -72,11 +65,11 @@ topics:
7265
- gcta:
7366
type: string
7467
description: The tool name
75-
- gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//':
68+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
7669
type: eval
7770
description: The command used to retrieve the GCTA version
7871

7972
authors:
80-
- "@andongni"
73+
- "@lyh970817"
8174
maintainers:
82-
- "@andongni"
75+
- "@lyh970817"

modules/nf-core/gcta/addgrms/tests/main.nf.test

Lines changed: 40 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -9,57 +9,37 @@ nextflow_process {
99
tag "gcta"
1010
tag "gcta/addgrms"
1111
tag "gcta/makegrmpart"
12-
tag "gawk"
1312

1413
setup {
15-
run("GAWK", alias: "GAWK_VARIANTS_LDMS1") {
16-
script "../../../gawk/main.nf"
17-
process {
18-
"""
19-
input[0] = [
20-
[ id:'plink_simulated_ldms1_variants' ],
21-
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
22-
]
23-
input[1] = Channel.of('NR <= 110 { print \$2 }').collectFile(name:'ldms1_variants.awk')
24-
input[2] = false
25-
"""
26-
}
27-
}
28-
29-
run("GAWK", alias: "GAWK_VARIANTS_LDMS2") {
30-
script "../../../gawk/main.nf"
31-
process {
32-
"""
33-
input[0] = [
34-
[ id:'plink_simulated_ldms2_variants' ],
35-
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
36-
]
37-
input[1] = Channel.of('NR > 110 { print \$2 }').collectFile(name:'ldms2_variants.awk')
38-
input[2] = false
39-
"""
40-
}
41-
}
42-
4314
run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS1") {
4415
script "../../makegrmpart/main.nf"
4516
process {
4617
"""
47-
file('plink_simulated.mbfile').text = 'plink_simulated\\n'
18+
file('plink_simulated_ldms1.mbfile').text = 'plink_simulated\\n'
19+
20+
def bimFile = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
21+
def extractSnps = bimFile.readLines()
22+
.take(10)
23+
.collect { row -> row.trim().split(/\\s+/)[1] }
24+
.join('\\n') + '\\n'
25+
file('plink_simulated_ldms1.snps.txt').text = extractSnps
4826

4927
input[0] = [
50-
[ id:'plink_simulated_ldms1', part_gcta_job:1, nparts_gcta:1 ],
51-
file('plink_simulated.mbfile'),
28+
[ id:'plink_simulated_ldms1' ],
29+
1,
30+
1,
31+
file('plink_simulated_ldms1.mbfile'),
5232
[
5333
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
5434
],
5535
[
56-
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
36+
bimFile
5737
],
5838
[
5939
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
6040
]
6141
]
62-
input[1] = GAWK_VARIANTS_LDMS1.out.output
42+
input[1] = [[ id:'plink_simulated_ldms1' ], file('plink_simulated_ldms1.snps.txt')]
6343
"""
6444
}
6545
}
@@ -68,22 +48,32 @@ nextflow_process {
6848
script "../../makegrmpart/main.nf"
6949
process {
7050
"""
71-
file('plink_simulated.mbfile').text = 'plink_simulated\\n'
51+
file('plink_simulated_ldms2.mbfile').text = 'plink_simulated\\n'
52+
53+
def bimFile = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
54+
def extractSnps = bimFile.readLines()
55+
.drop(10)
56+
.take(10)
57+
.collect { row -> row.trim().split(/\\s+/)[1] }
58+
.join('\\n') + '\\n'
59+
file('plink_simulated_ldms2.snps.txt').text = extractSnps
7260

7361
input[0] = [
74-
[ id:'plink_simulated_ldms2', part_gcta_job:1, nparts_gcta:1 ],
75-
file('plink_simulated.mbfile'),
62+
[ id:'plink_simulated_ldms2' ],
63+
1,
64+
1,
65+
file('plink_simulated_ldms2.mbfile'),
7666
[
7767
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
7868
],
7969
[
80-
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
70+
bimFile
8171
],
8272
[
8373
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
8474
]
8575
]
86-
input[1] = GAWK_VARIANTS_LDMS2.out.output
76+
input[1] = [[ id:'plink_simulated_ldms2' ], file('plink_simulated_ldms2.snps.txt')]
8777
"""
8878
}
8979
}
@@ -101,9 +91,9 @@ nextflow_process {
10191

10292
grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files
10393
.mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files)
104-
.map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] }
94+
.map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files }
10595
.collect()
106-
.map { rows -> rows.sort { left, right -> left[0].name <=> right[0].name }.flatten() }
96+
.map { bundles -> bundles.flatten().sort { it.name } }
10797

10898
input[0] = mgrm_file
10999
.combine(grm_files)
@@ -117,7 +107,13 @@ nextflow_process {
117107
{ assert process.success },
118108
{ assert process.out.combined_grm.size() == 1 },
119109
{ assert process.out.combined_grm.get(0).get(0).id == "plink_simulated_ldms" },
120-
{ assert file(process.out.combined_grm.get(0).get(1)).name == "plink_simulated_ldms.grm.id" },
110+
{
111+
assert process.out.combined_grm.get(0).get(1).collect { file(it).name }.toSet() == [
112+
'plink_simulated_ldms.grm.id',
113+
'plink_simulated_ldms.grm.bin',
114+
'plink_simulated_ldms.grm.N.bin'
115+
] as Set
116+
},
121117
{
122118
assert snapshot(
123119
process.out.combined_grm,
@@ -141,9 +137,9 @@ nextflow_process {
141137

142138
grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files
143139
.mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files)
144-
.map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] }
140+
.map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files }
145141
.collect()
146-
.map { rows -> rows.sort { left, right -> left[0].name <=> right[0].name }.flatten() }
142+
.map { bundles -> bundles.flatten().sort { it.name } }
147143

148144
input[0] = mgrm_file
149145
.combine(grm_files)

modules/nf-core/gcta/addgrms/tests/main.nf.test.snap

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66
{
77
"id": "plink_simulated_ldms"
88
},
9-
"plink_simulated_ldms.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e",
10-
"plink_simulated_ldms.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e",
11-
"plink_simulated_ldms.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e"
9+
[
10+
"plink_simulated_ldms.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e",
11+
"plink_simulated_ldms.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e",
12+
"plink_simulated_ldms.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e"
13+
]
1214
]
1315
],
1416
{
@@ -25,7 +27,7 @@
2527
"nf-test": "0.9.3",
2628
"nextflow": "25.10.4"
2729
},
28-
"timestamp": "2026-03-21T00:42:02.641342626"
30+
"timestamp": "2026-05-15T21:10:46.231316108"
2931
},
3032
"homo_sapiens popgen - merge dense GRMs from mgrm": {
3133
"content": [
@@ -34,9 +36,11 @@
3436
{
3537
"id": "plink_simulated_ldms"
3638
},
37-
"plink_simulated_ldms.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9",
38-
"plink_simulated_ldms.grm.bin:md5,59a9d628e3fb4b9488244048c952b2ca",
39-
"plink_simulated_ldms.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4"
39+
[
40+
"plink_simulated_ldms.grm.N.bin:md5,804f8e1799c8b2d4d3df1b52a2a463c6",
41+
"plink_simulated_ldms.grm.bin:md5,850235911329bf9ab68f03e25bbc1ef1",
42+
"plink_simulated_ldms.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9"
43+
]
4044
]
4145
],
4246
{
@@ -53,6 +57,6 @@
5357
"nf-test": "0.9.3",
5458
"nextflow": "25.10.4"
5559
},
56-
"timestamp": "2026-03-21T00:41:50.805078215"
60+
"timestamp": "2026-05-15T22:52:43.953267272"
5761
}
5862
}

0 commit comments

Comments
 (0)