Skip to content

Commit 322f285

Browse files
authored
Add gcta/addgrms module (#10995)
* Add gcta/addgrms module * Use apptainer-aware container syntax for gcta/addgrms * Tidy gcta/addgrms script and test config * Use sanitized snapshots for gcta/addgrms tests
1 parent 17896e4 commit 322f285

5 files changed

Lines changed: 338 additions & 0 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
- bioconda::gcta=1.94.1
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
process GCTA_ADDGRMS {
2+
tag "${meta.id}"
3+
label 'process_medium'
4+
conda "${moduleDir}/environment.yml"
5+
container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container
6+
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data'
7+
: 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}"
8+
9+
input:
10+
tuple val(meta), path(mgrm_file), path(grm_files)
11+
12+
output:
13+
tuple val(meta), path("*.grm.*"), emit: combined_grm
14+
tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions
15+
16+
when:
17+
task.ext.when == null || task.ext.when
18+
19+
script:
20+
def args = task.ext.args ?: ''
21+
def prefix = task.ext.prefix ?: "${meta.id}"
22+
"""
23+
gcta \\
24+
--mgrm ${mgrm_file} \\
25+
--make-grm \\
26+
--out ${prefix} \\
27+
--thread-num ${task.cpus} \\
28+
${args}
29+
"""
30+
31+
stub:
32+
def prefix = task.ext.prefix ?: "${meta.id}"
33+
"""
34+
touch ${prefix}.grm.id
35+
touch ${prefix}.grm.bin
36+
touch ${prefix}.grm.N.bin
37+
"""
38+
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
2+
name: "gcta_addgrms"
3+
description: Combine multiple GRMs listed in an MGRM manifest into a single dense GRM
4+
keywords:
5+
- gcta
6+
- genome-wide complex trait analysis
7+
- grm
8+
- genetic relationship matrix
9+
- genetics
10+
tools:
11+
- "gcta":
12+
description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data."
13+
homepage: "https://yanglab.westlake.edu.cn/software/gcta/"
14+
documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf"
15+
tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/"
16+
licence: ["GPL-3.0-only"]
17+
identifier: "biotools:gcta"
18+
19+
input:
20+
- - meta:
21+
type: map
22+
description: |
23+
Groovy map containing combined GRM metadata
24+
e.g. `[ id:'plink_simulated' ]`
25+
- mgrm_file:
26+
type: file
27+
description: MGRM manifest listing the GRM prefixes to combine
28+
pattern: "*.mgrm"
29+
ontologies:
30+
- edam: "http://edamontology.org/format_2330"
31+
- grm_files:
32+
type: file
33+
description: GRM sidecar files referenced by `mgrm_file`
34+
pattern: "*"
35+
ontologies: []
36+
37+
output:
38+
combined_grm:
39+
- - meta:
40+
type: map
41+
description: |
42+
Groovy map containing combined GRM metadata
43+
e.g. `[ id:'plink_simulated' ]`
44+
- "*.grm.*":
45+
type: file
46+
description: Combined dense GRM sidecar files
47+
pattern: "*.grm.{id,bin,N.bin}"
48+
ontologies: []
49+
versions_gcta:
50+
- - "${task.process}":
51+
type: string
52+
description: The process the version was collected from
53+
- "gcta":
54+
type: string
55+
description: The tool name
56+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
57+
type: eval
58+
description: The command used to retrieve the GCTA version
59+
60+
topics:
61+
versions:
62+
- - ${task.process}:
63+
type: string
64+
description: The process the version was collected from
65+
- gcta:
66+
type: string
67+
description: The tool name
68+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
69+
type: eval
70+
description: The command used to retrieve the GCTA version
71+
72+
authors:
73+
- "@lyh970817"
74+
maintainers:
75+
- "@lyh970817"
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
nextflow_process {
2+
3+
name "Test Process GCTA_ADDGRMS"
4+
script "../main.nf"
5+
process "GCTA_ADDGRMS"
6+
7+
tag "modules"
8+
tag "modules_nfcore"
9+
tag "gcta"
10+
tag "gcta/addgrms"
11+
tag "gcta/makegrmpart"
12+
13+
setup {
14+
run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS1") {
15+
script "../../makegrmpart/main.nf"
16+
process {
17+
"""
18+
file('plink_simulated_ldms1.mbfile').text = 'plink_simulated\\n'
19+
20+
def ldms1BimFile = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
21+
def ldms1ExtractSnps = ldms1BimFile.readLines()
22+
.take(10)
23+
.collect { row -> row.trim().split(/\\s+/)[1] }
24+
.join('\\n') + '\\n'
25+
file('plink_simulated_ldms1.snps.txt').text = ldms1ExtractSnps
26+
27+
input[0] = [
28+
[ id:'plink_simulated_ldms1' ],
29+
1,
30+
1,
31+
file('plink_simulated_ldms1.mbfile'),
32+
[
33+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
34+
],
35+
[
36+
ldms1BimFile
37+
],
38+
[
39+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
40+
]
41+
]
42+
input[1] = [[ id:'plink_simulated_ldms1' ], file('plink_simulated_ldms1.snps.txt')]
43+
"""
44+
}
45+
}
46+
47+
run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS2") {
48+
script "../../makegrmpart/main.nf"
49+
process {
50+
"""
51+
file('plink_simulated_ldms2.mbfile').text = 'plink_simulated\\n'
52+
53+
def ldms2BimFile = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
54+
def ldms2ExtractSnps = ldms2BimFile.readLines()
55+
.drop(10)
56+
.take(10)
57+
.collect { row -> row.trim().split(/\\s+/)[1] }
58+
.join('\\n') + '\\n'
59+
file('plink_simulated_ldms2.snps.txt').text = ldms2ExtractSnps
60+
61+
input[0] = [
62+
[ id:'plink_simulated_ldms2' ],
63+
1,
64+
1,
65+
file('plink_simulated_ldms2.mbfile'),
66+
[
67+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
68+
],
69+
[
70+
ldms2BimFile
71+
],
72+
[
73+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
74+
]
75+
]
76+
input[1] = [[ id:'plink_simulated_ldms2' ], file('plink_simulated_ldms2.snps.txt')]
77+
"""
78+
}
79+
}
80+
}
81+
82+
test("homo_sapiens popgen - merge dense GRMs from mgrm") {
83+
when {
84+
process {
85+
"""
86+
mgrm_file = Channel
87+
.of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1')
88+
.collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
89+
90+
grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files
91+
.mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files)
92+
.map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files }
93+
.collect()
94+
.map { bundles -> bundles.flatten().sort { it.name } }
95+
96+
input[0] = mgrm_file
97+
.combine(grm_files)
98+
.map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] }
99+
"""
100+
}
101+
}
102+
103+
then {
104+
assertAll(
105+
{ assert process.success },
106+
{ assert process.out.combined_grm.size() == 1 },
107+
{ assert process.out.combined_grm.get(0).get(0).id == "plink_simulated_ldms" },
108+
{
109+
assert process.out.combined_grm.get(0).get(1).collect { file(it).name }.toSet() == [
110+
'plink_simulated_ldms.grm.id',
111+
'plink_simulated_ldms.grm.bin',
112+
'plink_simulated_ldms.grm.N.bin'
113+
] as Set
114+
},
115+
{
116+
assert snapshot(sanitizeOutput(process.out)).match()
117+
}
118+
)
119+
}
120+
}
121+
122+
test("homo_sapiens popgen - merge dense GRMs from mgrm - stub") {
123+
options "-stub"
124+
125+
when {
126+
process {
127+
"""
128+
mgrm_file = Channel
129+
.of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1')
130+
.collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
131+
132+
grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files
133+
.mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files)
134+
.map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files }
135+
.collect()
136+
.map { bundles -> bundles.flatten().sort { it.name } }
137+
138+
input[0] = mgrm_file
139+
.combine(grm_files)
140+
.map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] }
141+
"""
142+
}
143+
}
144+
145+
then {
146+
assertAll(
147+
{ assert process.success },
148+
{ assert process.out.combined_grm.size() == 1 },
149+
{ assert process.out.combined_grm.get(0).get(0).id == "plink_simulated_ldms" },
150+
{
151+
assert snapshot(sanitizeOutput(process.out)).match()
152+
}
153+
)
154+
}
155+
}
156+
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
{
2+
"homo_sapiens popgen - merge dense GRMs from mgrm - stub": {
3+
"content": [
4+
{
5+
"combined_grm": [
6+
[
7+
{
8+
"id": "plink_simulated_ldms"
9+
},
10+
[
11+
"plink_simulated_ldms.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e",
12+
"plink_simulated_ldms.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e",
13+
"plink_simulated_ldms.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e"
14+
]
15+
]
16+
],
17+
"versions_gcta": [
18+
[
19+
"GCTA_ADDGRMS",
20+
"gcta",
21+
"1.94.1"
22+
]
23+
]
24+
}
25+
],
26+
"meta": {
27+
"nf-test": "0.9.3",
28+
"nextflow": "25.10.4"
29+
},
30+
"timestamp": "2026-05-26T21:43:38.665922804"
31+
},
32+
"homo_sapiens popgen - merge dense GRMs from mgrm": {
33+
"content": [
34+
{
35+
"combined_grm": [
36+
[
37+
{
38+
"id": "plink_simulated_ldms"
39+
},
40+
[
41+
"plink_simulated_ldms.grm.N.bin:md5,804f8e1799c8b2d4d3df1b52a2a463c6",
42+
"plink_simulated_ldms.grm.bin:md5,850235911329bf9ab68f03e25bbc1ef1",
43+
"plink_simulated_ldms.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9"
44+
]
45+
]
46+
],
47+
"versions_gcta": [
48+
[
49+
"GCTA_ADDGRMS",
50+
"gcta",
51+
"1.94.1"
52+
]
53+
]
54+
}
55+
],
56+
"meta": {
57+
"nf-test": "0.9.3",
58+
"nextflow": "25.10.4"
59+
},
60+
"timestamp": "2026-05-26T21:43:29.377836352"
61+
}
62+
}

0 commit comments

Comments
 (0)