Skip to content

Commit a53756e

Browse files
committed
Add gcta/filtergrmwithkeep module
1 parent 4b801f9 commit a53756e

17 files changed

Lines changed: 1138 additions & 0 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
- bioconda::gcta=1.94.1
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
process GCTA_FILTERGRMWITHKEEP {
2+
tag "${meta.id}"
3+
label 'process_medium'
4+
conda "${moduleDir}/environment.yml"
5+
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
6+
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data'
7+
: 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}"
8+
9+
input:
10+
tuple val(meta), path(grm_files)
11+
tuple val(meta2), path(keep_file)
12+
13+
output:
14+
tuple val(meta), path("*_unrel.grm.*"), emit: filtered_grm
15+
tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions
16+
17+
when:
18+
task.ext.when == null || task.ext.when
19+
20+
script:
21+
def extra_args = task.ext.args ?: ''
22+
def prefix = task.ext.prefix ?: "${meta.id}"
23+
24+
"""
25+
26+
gcta \\
27+
--grm ${meta.id} \\
28+
--keep ${keep_file} \\
29+
--make-grm \\
30+
--out ${prefix}_unrel \\
31+
--thread-num ${task.cpus} \\
32+
${extra_args}
33+
"""
34+
35+
stub:
36+
def prefix = task.ext.prefix ?: "${meta.id}"
37+
"""
38+
touch ${prefix}_unrel.grm.id
39+
touch ${prefix}_unrel.grm.bin
40+
touch ${prefix}_unrel.grm.N.bin
41+
"""
42+
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
2+
name: "gcta_filtergrmwithkeep"
3+
description: Filter a dense GRM to the individuals listed in a keep file
4+
keywords:
5+
- gcta
6+
- genome-wide complex trait analysis
7+
- grm
8+
- genetic relationship matrix
9+
- genetics
10+
tools:
11+
- "gcta":
12+
description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data."
13+
homepage: "https://yanglab.westlake.edu.cn/software/gcta/"
14+
documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf"
15+
tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/"
16+
licence: ["GPL-3.0-only"]
17+
identifier: "biotools:gcta"
18+
19+
input:
20+
- - meta:
21+
type: map
22+
description: |
23+
Groovy map containing dense GRM metadata
24+
e.g. `[ id:'plink_simulated' ]`
25+
`meta.id` is required and is the dense GRM basename contract used by `gcta --grm`.
26+
Input files must therefore be staged as `<meta.id>.grm.id`, `<meta.id>.grm.bin`, and `<meta.id>.grm.N.bin`.
27+
- grm_files:
28+
type: file
29+
description: Dense GRM file bundle
30+
pattern: "*.grm.*"
31+
ontologies: []
32+
- - meta2:
33+
type: map
34+
description: |
35+
Groovy map containing keep-file metadata
36+
e.g. `[ id:'plink_simulated_keep' ]`
37+
- keep_file:
38+
type: file
39+
description: Keep file listing the individuals to retain
40+
pattern: "*.{keep,txt,id}"
41+
ontologies:
42+
- edam: "http://edamontology.org/format_2330"
43+
44+
output:
45+
filtered_grm:
46+
- - meta:
47+
type: map
48+
description: |
49+
Groovy map containing dense GRM metadata
50+
e.g. `[ id:'plink_simulated' ]`
51+
`meta.id` is preserved from the input dense GRM basename contract.
52+
- "*_unrel.grm.*":
53+
type: file
54+
description: Filtered GRM file bundle
55+
pattern: "*_unrel.grm.*"
56+
ontologies: []
57+
versions_gcta:
58+
- - "${task.process}":
59+
type: string
60+
description: The process the version was collected from
61+
- "gcta":
62+
type: string
63+
description: The tool name
64+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
65+
type: eval
66+
description: The command used to retrieve the GCTA version
67+
68+
topics:
69+
versions:
70+
- - ${task.process}:
71+
type: string
72+
description: The process the version was collected from
73+
- gcta:
74+
type: string
75+
description: The tool name
76+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
77+
type: eval
78+
description: The command used to retrieve the GCTA version
79+
80+
authors:
81+
- "@lyh970817"
82+
maintainers:
83+
- "@lyh970817"
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
nextflow_process {
2+
3+
name "Test Process GCTA_FILTERGRMWITHKEEP"
4+
script "../main.nf"
5+
process "GCTA_FILTERGRMWITHKEEP"
6+
7+
tag "modules"
8+
tag "modules_nfcore"
9+
tag "gcta"
10+
tag "gcta/filtergrmwithkeep"
11+
tag "gcta/makegrm"
12+
tag "gcta/removerelatedsubjects"
13+
14+
setup {
15+
run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_DENSE") {
16+
script "../../makegrm/main.nf"
17+
process {
18+
"""
19+
file('plink_simulated.mpfile').text = 'plink_simulated plink_simulated.pgen plink_simulated.psam plink_simulated.pvar\\n'
20+
21+
input[0] = [
22+
[ id:'plink_simulated_dense' ],
23+
file('plink_simulated.mpfile'),
24+
[
25+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true)
26+
],
27+
[
28+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true)
29+
],
30+
[
31+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true)
32+
]
33+
]
34+
"""
35+
}
36+
}
37+
38+
run("GCTA_REMOVERELATEDSUBJECTS", alias: "GCTA_REMOVERELATEDSUBJECTS_KEEP") {
39+
script "../../removerelatedsubjects/main.nf"
40+
process {
41+
"""
42+
dense_grm = GCTA_MAKEGRM_DENSE.out.grm_files
43+
.map { meta, grm_files -> [[ id:meta.id ], grm_files] }
44+
45+
input[0] = dense_grm
46+
"""
47+
}
48+
}
49+
}
50+
51+
test("homo_sapiens popgen - filter dense GRM with keep file") {
52+
config "./nextflow.config"
53+
54+
when {
55+
process {
56+
"""
57+
input[0] = GCTA_MAKEGRM_DENSE.out.grm_files.map { meta, grm_files -> [[ id:meta.id ], grm_files] }
58+
input[1] = GCTA_REMOVERELATEDSUBJECTS_KEEP.out.keep_file
59+
"""
60+
}
61+
}
62+
63+
then {
64+
assertAll(
65+
{ assert process.success },
66+
{ assert process.out.filtered_grm.size() == 1 },
67+
{ assert process.out.filtered_grm.get(0).get(0).id == "plink_simulated_dense" },
68+
{
69+
assert snapshot(
70+
process.out.filtered_grm
71+
).match("filtered_grm")
72+
},
73+
{
74+
def row = process.out.filtered_grm.get(0)
75+
assert row.get(1).every { file(it).exists() }
76+
assert row.get(1).collect { file(it).name }.sort() == [
77+
"plink_simulated_dense_unrel.grm.N.bin",
78+
"plink_simulated_dense_unrel.grm.bin",
79+
"plink_simulated_dense_unrel.grm.id"
80+
]
81+
},
82+
{ assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("versions") }
83+
)
84+
}
85+
}
86+
87+
test("homo_sapiens popgen - fail when meta id does not match dense GRM basename") {
88+
config "./nextflow.config"
89+
90+
when {
91+
process {
92+
"""
93+
input[0] = GCTA_MAKEGRM_DENSE.out.grm_files.map { meta, grm_files ->
94+
[[ id:'contract_dense_mismatch' ], grm_files]
95+
}
96+
input[1] = GCTA_REMOVERELATEDSUBJECTS_KEEP.out.keep_file
97+
"""
98+
}
99+
}
100+
101+
then {
102+
assertAll(
103+
{ assert !process.success },
104+
{ assert process.exitStatus != 0 }
105+
)
106+
}
107+
}
108+
109+
test("homo_sapiens popgen - filter dense GRM with keep file - stub") {
110+
options "-stub"
111+
config "./nextflow.config"
112+
113+
when {
114+
process {
115+
"""
116+
input[0] = GCTA_MAKEGRM_DENSE.out.grm_files.map { meta, grm_files -> [[ id:meta.id ], grm_files] }
117+
input[1] = GCTA_REMOVERELATEDSUBJECTS_KEEP.out.keep_file
118+
"""
119+
}
120+
}
121+
122+
then {
123+
assertAll(
124+
{ assert process.success },
125+
{ assert process.out.filtered_grm.size() == 1 },
126+
{ assert process.out.filtered_grm.get(0).get(0).id == "plink_simulated_dense" },
127+
{
128+
assert snapshot(
129+
process.out.filtered_grm
130+
).match("stub_filtered_grm")
131+
},
132+
{
133+
def row = process.out.filtered_grm.get(0)
134+
assert row.get(1).every { file(it).exists() }
135+
assert row.get(1).collect { file(it).name }.sort() == [
136+
"plink_simulated_dense_unrel.grm.N.bin",
137+
"plink_simulated_dense_unrel.grm.bin",
138+
"plink_simulated_dense_unrel.grm.id"
139+
]
140+
},
141+
{ assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("stub_versions") }
142+
)
143+
}
144+
}
145+
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
{
2+
"versions": {
3+
"content": [
4+
{
5+
"versions_gcta": [
6+
[
7+
"GCTA_FILTERGRMWITHKEEP",
8+
"gcta",
9+
"1.94.1"
10+
]
11+
]
12+
}
13+
],
14+
"meta": {
15+
"nf-test": "0.9.3",
16+
"nextflow": "25.10.4"
17+
},
18+
"timestamp": "2026-03-21T00:31:58.734112033"
19+
},
20+
"stub_filtered_grm": {
21+
"content": [
22+
[
23+
[
24+
{
25+
"id": "plink_simulated_dense"
26+
},
27+
[
28+
"plink_simulated_dense_unrel.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e",
29+
"plink_simulated_dense_unrel.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e",
30+
"plink_simulated_dense_unrel.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e"
31+
]
32+
]
33+
]
34+
],
35+
"meta": {
36+
"nf-test": "0.9.3",
37+
"nextflow": "25.10.4"
38+
},
39+
"timestamp": "2026-05-15T23:16:45.31350454"
40+
},
41+
"filtered_grm": {
42+
"content": [
43+
[
44+
[
45+
{
46+
"id": "plink_simulated_dense"
47+
},
48+
[
49+
"plink_simulated_dense_unrel.grm.N.bin:md5,06b73ea8bae8f1e5f5d4de33dbd2c75e",
50+
"plink_simulated_dense_unrel.grm.bin:md5,b1f124463eecbae86840a6651eec372d",
51+
"plink_simulated_dense_unrel.grm.id:md5,ca8c0bded6951fdd3bf0dddc97b6df6b"
52+
]
53+
]
54+
]
55+
],
56+
"meta": {
57+
"nf-test": "0.9.3",
58+
"nextflow": "25.10.4"
59+
},
60+
"timestamp": "2026-05-15T23:14:49.332143185"
61+
},
62+
"stub_versions": {
63+
"content": [
64+
{
65+
"versions_gcta": [
66+
[
67+
"GCTA_FILTERGRMWITHKEEP",
68+
"gcta",
69+
"1.94.1"
70+
]
71+
]
72+
}
73+
],
74+
"meta": {
75+
"nf-test": "0.9.3",
76+
"nextflow": "25.10.4"
77+
},
78+
"timestamp": "2026-03-21T00:32:15.282563791"
79+
}
80+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
params {
2+
modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/"
3+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
- bioconda::gcta=1.94.1

0 commit comments

Comments
 (0)