Skip to content

Commit 5962c65

Browse files
committed
Add gcta/addgrms module
1 parent 4b801f9 commit 5962c65

5 files changed

Lines changed: 349 additions & 0 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
- bioconda::gcta=1.94.1
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
process GCTA_ADDGRMS {
2+
tag "${meta.id}"
3+
label 'process_medium'
4+
conda "${moduleDir}/environment.yml"
5+
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
6+
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data'
7+
: 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}"
8+
9+
input:
10+
tuple val(meta), path(mgrm_file), path(grm_files)
11+
12+
output:
13+
tuple val(meta), path("*.grm.*"), emit: combined_grm
14+
tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions
15+
16+
when:
17+
task.ext.when == null || task.ext.when
18+
19+
script:
20+
def extra_args = task.ext.args ?: ''
21+
def prefix = task.ext.prefix ?: "${meta.id}"
22+
23+
"""
24+
25+
gcta \\
26+
--mgrm ${mgrm_file} \\
27+
--make-grm \\
28+
--out ${prefix} \\
29+
--thread-num ${task.cpus} \\
30+
${extra_args}
31+
"""
32+
33+
stub:
34+
def prefix = task.ext.prefix ?: "${meta.id}"
35+
"""
36+
touch ${prefix}.grm.id
37+
touch ${prefix}.grm.bin
38+
touch ${prefix}.grm.N.bin
39+
"""
40+
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
2+
name: "gcta_addgrms"
3+
description: Combine multiple GRMs listed in an MGRM manifest into a single dense GRM
4+
keywords:
5+
- gcta
6+
- genome-wide complex trait analysis
7+
- grm
8+
- genetic relationship matrix
9+
- genetics
10+
tools:
11+
- "gcta":
12+
description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data."
13+
homepage: "https://yanglab.westlake.edu.cn/software/gcta/"
14+
documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf"
15+
tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/"
16+
licence: ["GPL-3.0-only"]
17+
identifier: "biotools:gcta"
18+
19+
input:
20+
- - meta:
21+
type: map
22+
description: |
23+
Groovy map containing combined GRM metadata
24+
e.g. `[ id:'plink_simulated' ]`
25+
- mgrm_file:
26+
type: file
27+
description: MGRM manifest listing the GRM prefixes to combine
28+
pattern: "*.mgrm"
29+
ontologies:
30+
- edam: "http://edamontology.org/format_2330"
31+
- grm_files:
32+
type: file
33+
description: GRM sidecar files referenced by `mgrm_file`
34+
pattern: "*"
35+
ontologies: []
36+
37+
output:
38+
combined_grm:
39+
- - meta:
40+
type: map
41+
description: |
42+
Groovy map containing combined GRM metadata
43+
e.g. `[ id:'plink_simulated' ]`
44+
- "*.grm.*":
45+
type: file
46+
description: Combined dense GRM sidecar files
47+
pattern: "*.grm.{id,bin,N.bin}"
48+
ontologies: []
49+
versions_gcta:
50+
- - "${task.process}":
51+
type: string
52+
description: The process the version was collected from
53+
- "gcta":
54+
type: string
55+
description: The tool name
56+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
57+
type: eval
58+
description: The command used to retrieve the GCTA version
59+
60+
topics:
61+
versions:
62+
- - ${task.process}:
63+
type: string
64+
description: The process the version was collected from
65+
- gcta:
66+
type: string
67+
description: The tool name
68+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
69+
type: eval
70+
description: The command used to retrieve the GCTA version
71+
72+
authors:
73+
- "@lyh970817"
74+
maintainers:
75+
- "@lyh970817"
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
nextflow_process {
2+
3+
name "Test Process GCTA_ADDGRMS"
4+
script "../main.nf"
5+
process "GCTA_ADDGRMS"
6+
7+
tag "modules"
8+
tag "modules_nfcore"
9+
tag "gcta"
10+
tag "gcta/addgrms"
11+
tag "gcta/makegrmpart"
12+
13+
setup {
14+
run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS1") {
15+
script "../../makegrmpart/main.nf"
16+
process {
17+
"""
18+
file('plink_simulated_ldms1.mbfile').text = 'plink_simulated\\n'
19+
20+
def ldms1BimFile = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
21+
def ldms1ExtractSnps = ldms1BimFile.readLines()
22+
.take(10)
23+
.collect { row -> row.trim().split(/\\s+/)[1] }
24+
.join('\\n') + '\\n'
25+
file('plink_simulated_ldms1.snps.txt').text = ldms1ExtractSnps
26+
27+
input[0] = [
28+
[ id:'plink_simulated_ldms1' ],
29+
1,
30+
1,
31+
file('plink_simulated_ldms1.mbfile'),
32+
[
33+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
34+
],
35+
[
36+
ldms1BimFile
37+
],
38+
[
39+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
40+
]
41+
]
42+
input[1] = [[ id:'plink_simulated_ldms1' ], file('plink_simulated_ldms1.snps.txt')]
43+
"""
44+
}
45+
}
46+
47+
run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS2") {
48+
script "../../makegrmpart/main.nf"
49+
process {
50+
"""
51+
file('plink_simulated_ldms2.mbfile').text = 'plink_simulated\\n'
52+
53+
def ldms2BimFile = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
54+
def ldms2ExtractSnps = ldms2BimFile.readLines()
55+
.drop(10)
56+
.take(10)
57+
.collect { row -> row.trim().split(/\\s+/)[1] }
58+
.join('\\n') + '\\n'
59+
file('plink_simulated_ldms2.snps.txt').text = ldms2ExtractSnps
60+
61+
input[0] = [
62+
[ id:'plink_simulated_ldms2' ],
63+
1,
64+
1,
65+
file('plink_simulated_ldms2.mbfile'),
66+
[
67+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
68+
],
69+
[
70+
ldms2BimFile
71+
],
72+
[
73+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
74+
]
75+
]
76+
input[1] = [[ id:'plink_simulated_ldms2' ], file('plink_simulated_ldms2.snps.txt')]
77+
"""
78+
}
79+
}
80+
}
81+
82+
test("homo_sapiens popgen - merge dense GRMs from mgrm") {
83+
config "./nextflow.config"
84+
85+
when {
86+
process {
87+
"""
88+
mgrm_file = Channel
89+
.of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1')
90+
.collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
91+
92+
grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files
93+
.mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files)
94+
.map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files }
95+
.collect()
96+
.map { bundles -> bundles.flatten().sort { it.name } }
97+
98+
input[0] = mgrm_file
99+
.combine(grm_files)
100+
.map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] }
101+
"""
102+
}
103+
}
104+
105+
then {
106+
assertAll(
107+
{ assert process.success },
108+
{ assert process.out.combined_grm.size() == 1 },
109+
{ assert process.out.combined_grm.get(0).get(0).id == "plink_simulated_ldms" },
110+
{
111+
assert process.out.combined_grm.get(0).get(1).collect { file(it).name }.toSet() == [
112+
'plink_simulated_ldms.grm.id',
113+
'plink_simulated_ldms.grm.bin',
114+
'plink_simulated_ldms.grm.N.bin'
115+
] as Set
116+
},
117+
{
118+
assert snapshot(
119+
process.out.combined_grm,
120+
process.out.findAll { key, val -> key.startsWith('versions') }
121+
).match()
122+
}
123+
)
124+
}
125+
}
126+
127+
test("homo_sapiens popgen - merge dense GRMs from mgrm - stub") {
128+
options "-stub"
129+
config "./nextflow.config"
130+
131+
when {
132+
process {
133+
"""
134+
mgrm_file = Channel
135+
.of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1')
136+
.collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
137+
138+
grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files
139+
.mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files)
140+
.map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files }
141+
.collect()
142+
.map { bundles -> bundles.flatten().sort { it.name } }
143+
144+
input[0] = mgrm_file
145+
.combine(grm_files)
146+
.map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] }
147+
"""
148+
}
149+
}
150+
151+
then {
152+
assertAll(
153+
{ assert process.success },
154+
{ assert process.out.combined_grm.size() == 1 },
155+
{ assert process.out.combined_grm.get(0).get(0).id == "plink_simulated_ldms" },
156+
{
157+
assert snapshot(
158+
process.out.combined_grm,
159+
process.out.findAll { key, val -> key.startsWith('versions') }
160+
).match()
161+
}
162+
)
163+
}
164+
}
165+
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
{
2+
"homo_sapiens popgen - merge dense GRMs from mgrm - stub": {
3+
"content": [
4+
[
5+
[
6+
{
7+
"id": "plink_simulated_ldms"
8+
},
9+
[
10+
"plink_simulated_ldms.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e",
11+
"plink_simulated_ldms.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e",
12+
"plink_simulated_ldms.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e"
13+
]
14+
]
15+
],
16+
{
17+
"versions_gcta": [
18+
[
19+
"GCTA_ADDGRMS",
20+
"gcta",
21+
"1.94.1"
22+
]
23+
]
24+
}
25+
],
26+
"meta": {
27+
"nf-test": "0.9.3",
28+
"nextflow": "25.10.4"
29+
},
30+
"timestamp": "2026-05-15T21:10:46.231316108"
31+
},
32+
"homo_sapiens popgen - merge dense GRMs from mgrm": {
33+
"content": [
34+
[
35+
[
36+
{
37+
"id": "plink_simulated_ldms"
38+
},
39+
[
40+
"plink_simulated_ldms.grm.N.bin:md5,804f8e1799c8b2d4d3df1b52a2a463c6",
41+
"plink_simulated_ldms.grm.bin:md5,850235911329bf9ab68f03e25bbc1ef1",
42+
"plink_simulated_ldms.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9"
43+
]
44+
]
45+
],
46+
{
47+
"versions_gcta": [
48+
[
49+
"GCTA_ADDGRMS",
50+
"gcta",
51+
"1.94.1"
52+
]
53+
]
54+
}
55+
],
56+
"meta": {
57+
"nf-test": "0.9.3",
58+
"nextflow": "25.10.4"
59+
},
60+
"timestamp": "2026-05-15T22:52:43.953267272"
61+
}
62+
}

0 commit comments

Comments
 (0)