Skip to content

Commit 92203be

Browse files
committed
Add gcta/makebksparse module
1 parent 69017e7 commit 92203be

5 files changed

Lines changed: 326 additions & 0 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
- bioconda::gcta=1.94.1
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
process GCTA_MAKEBKSPARSE {
2+
tag "${meta.id}"
3+
label 'process_medium'
4+
conda "${moduleDir}/environment.yml"
5+
container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container
6+
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data'
7+
: 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}"
8+
9+
input:
10+
tuple val(meta), path(grm_files)
11+
val cutoff
12+
13+
output:
14+
tuple val(meta), path("*_sp.grm.*"), emit: sparse_grm_files
15+
tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions
16+
17+
when:
18+
task.ext.when == null || task.ext.when
19+
20+
script:
21+
def args = task.ext.args ?: ''
22+
def dense_prefix = meta.id
23+
def prefix = task.ext.prefix ?: "${meta.id}"
24+
meta = meta + [id: "${prefix}_sp"]
25+
"""
26+
gcta \\
27+
--grm ${dense_prefix} \\
28+
--make-bK-sparse ${cutoff} \\
29+
--out ${prefix}_sp \\
30+
--thread-num ${task.cpus} \\
31+
${args}
32+
"""
33+
34+
stub:
35+
def prefix = task.ext.prefix ?: "${meta.id}"
36+
meta = meta + [id: "${prefix}_sp"]
37+
"""
38+
touch ${prefix}_sp.grm.id
39+
touch ${prefix}_sp.grm.sp
40+
"""
41+
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
2+
name: "gcta_makebksparse"
3+
description: Create a sparse GRM from a dense GRM for downstream fastGWA analyses
4+
keywords:
5+
- gcta
6+
- genome-wide complex trait analysis
7+
- grm
8+
- genetic relationship matrix
9+
- sparse
10+
- genetics
11+
tools:
12+
- "gcta":
13+
description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data."
14+
homepage: "https://yanglab.westlake.edu.cn/software/gcta/"
15+
documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf"
16+
tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/"
17+
licence: ["GPL-3.0-only"]
18+
identifier: "biotools:gcta"
19+
20+
input:
21+
- - meta:
22+
type: map
23+
description: |
24+
Groovy map containing dense GRM metadata
25+
e.g. `[ id:'plink_simulated' ]`
26+
`meta.id` is required and is the dense GRM basename contract used by `gcta --grm`.
27+
Input files must therefore be staged as `<meta.id>.grm.id`, `<meta.id>.grm.bin`, and `<meta.id>.grm.N.bin`.
28+
- grm_files:
29+
type: file
30+
description: Dense GRM sidecar files
31+
pattern: "*.grm.{id,bin,N.bin}"
32+
ontologies: []
33+
- cutoff:
34+
type: float
35+
description: Sparse GRM cutoff passed to `--make-bK-sparse`
36+
37+
output:
38+
sparse_grm_files:
39+
- - meta:
40+
type: map
41+
description: |
42+
Groovy map containing sparse GRM metadata
43+
e.g. `[ id:'plink_simulated_sp' ]`
44+
`meta.id` is set to the sparse GRM basename emitted by this module and can be used downstream as `--grm-sparse`.
45+
- "*_sp.grm.*":
46+
type: file
47+
description: Sparse GRM sidecar files
48+
pattern: "*_sp.grm.{id,sp}"
49+
ontologies: []
50+
versions_gcta:
51+
- - "${task.process}":
52+
type: string
53+
description: The process the version was collected from
54+
- "gcta":
55+
type: string
56+
description: The tool name
57+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
58+
type: eval
59+
description: The command used to retrieve the GCTA version
60+
61+
topics:
62+
versions:
63+
- - ${task.process}:
64+
type: string
65+
description: The process the version was collected from
66+
- gcta:
67+
type: string
68+
description: The tool name
69+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
70+
type: eval
71+
description: The command used to retrieve the GCTA version
72+
73+
authors:
74+
- "@lyh970817"
75+
maintainers:
76+
- "@lyh970817"
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
nextflow_process {
2+
3+
name "Test Process GCTA_MAKEBKSPARSE"
4+
script "../main.nf"
5+
process "GCTA_MAKEBKSPARSE"
6+
7+
tag "modules"
8+
tag "modules_nfcore"
9+
tag "gcta"
10+
tag "gcta/makebksparse"
11+
tag "gcta/makegrm"
12+
13+
setup {
14+
run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_CONTRACT") {
15+
script "../../makegrm/main.nf"
16+
process {
17+
"""
18+
file('contract_dense.mbfile').text = 'plink_simulated\\n'
19+
20+
input[0] = [
21+
[ id:'contract_dense' ],
22+
file('contract_dense.mbfile'),
23+
[
24+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
25+
],
26+
[
27+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
28+
],
29+
[
30+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
31+
]
32+
]
33+
"""
34+
}
35+
}
36+
37+
run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_STUB") {
38+
script "../../makegrm/main.nf"
39+
process {
40+
"""
41+
file('stub_dense.mbfile').text = 'plink_simulated\\n'
42+
43+
input[0] = [
44+
[ id:'stub_dense' ],
45+
file('stub_dense.mbfile'),
46+
[
47+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
48+
],
49+
[
50+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
51+
],
52+
[
53+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
54+
]
55+
]
56+
"""
57+
}
58+
}
59+
}
60+
61+
test("homo_sapiens popgen - create sparse GRM") {
62+
when {
63+
process {
64+
"""
65+
input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files
66+
input[1] = Channel.value(0.05)
67+
"""
68+
}
69+
}
70+
71+
then {
72+
assertAll(
73+
{ assert process.success },
74+
{ assert process.out.sparse_grm_files.size() == 1 },
75+
{ assert process.out.sparse_grm_files.get(0).get(0).id == "contract_dense_sp" },
76+
{ assert snapshot(process.out.sparse_grm_files).match("sparse_grm_files") },
77+
{ assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("versions") }
78+
)
79+
}
80+
}
81+
82+
test("homo_sapiens popgen - fail when meta id does not match dense GRM basename") {
83+
when {
84+
process {
85+
"""
86+
input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files.map { meta, grm_files ->
87+
[[ id:'contract_dense_mismatch' ], grm_files]
88+
}
89+
input[1] = Channel.value(0.05)
90+
"""
91+
}
92+
}
93+
94+
then {
95+
assertAll(
96+
{ assert !process.success },
97+
{ assert process.exitStatus != 0 }
98+
)
99+
}
100+
}
101+
102+
test("homo_sapiens popgen - create sparse GRM - stub") {
103+
options "-stub"
104+
105+
when {
106+
process {
107+
"""
108+
input[0] = GCTA_MAKEGRM_STUB.out.grm_files
109+
input[1] = Channel.value(0.05)
110+
"""
111+
}
112+
}
113+
114+
then {
115+
assertAll(
116+
{ assert process.success },
117+
{ assert process.out.sparse_grm_files.size() == 1 },
118+
{ assert process.out.sparse_grm_files.get(0).get(0).id == "stub_dense_sp" },
119+
{ assert snapshot(process.out.sparse_grm_files).match("stub_sparse_grm_files") },
120+
{ assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("stub_versions") }
121+
)
122+
}
123+
}
124+
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
{
2+
"stub_sparse_grm_files": {
3+
"content": [
4+
[
5+
[
6+
{
7+
"id": "stub_dense_sp"
8+
},
9+
[
10+
"stub_dense_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e",
11+
"stub_dense_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e"
12+
]
13+
]
14+
]
15+
],
16+
"meta": {
17+
"nf-test": "0.9.3",
18+
"nextflow": "25.10.4"
19+
},
20+
"timestamp": "2026-05-15T21:12:21.136559698"
21+
},
22+
"versions": {
23+
"content": [
24+
{
25+
"versions_gcta": [
26+
[
27+
"GCTA_MAKEBKSPARSE",
28+
"gcta",
29+
"1.94.1"
30+
]
31+
]
32+
}
33+
],
34+
"meta": {
35+
"nf-test": "0.9.3",
36+
"nextflow": "25.10.4"
37+
},
38+
"timestamp": "2026-03-21T00:30:38.045354436"
39+
},
40+
"sparse_grm_files": {
41+
"content": [
42+
[
43+
[
44+
{
45+
"id": "contract_dense_sp"
46+
},
47+
[
48+
"contract_dense_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9",
49+
"contract_dense_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a"
50+
]
51+
]
52+
]
53+
],
54+
"meta": {
55+
"nf-test": "0.9.3",
56+
"nextflow": "25.10.4"
57+
},
58+
"timestamp": "2026-05-15T21:09:25.501833656"
59+
},
60+
"stub_versions": {
61+
"content": [
62+
{
63+
"versions_gcta": [
64+
[
65+
"GCTA_MAKEBKSPARSE",
66+
"gcta",
67+
"1.94.1"
68+
]
69+
]
70+
}
71+
],
72+
"meta": {
73+
"nf-test": "0.9.3",
74+
"nextflow": "25.10.4"
75+
},
76+
"timestamp": "2026-03-21T00:30:48.775770627"
77+
}
78+
}

0 commit comments

Comments
 (0)