Skip to content

Commit 6ba256b

Browse files
committed
Add gcta/removerelatedsubjects module
1 parent 69017e7 commit 6ba256b

5 files changed

Lines changed: 375 additions & 0 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
- bioconda::gcta=1.94.1
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
process GCTA_REMOVERELATEDSUBJECTS {
2+
tag "${meta.id}"
3+
label 'process_medium'
4+
conda "${moduleDir}/environment.yml"
5+
container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container
6+
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data'
7+
: 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}"
8+
9+
input:
10+
tuple val(meta), path(grm_files)
11+
12+
output:
13+
tuple val(meta), path("*_unrel05.grm.*"), emit: grm_files
14+
tuple val(meta), path("*_unrel05.grm.id"), emit: keep_file
15+
tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions
16+
17+
when:
18+
task.ext.when == null || task.ext.when
19+
20+
script:
21+
def args = task.ext.args ?: ''
22+
def prefix = task.ext.prefix ?: "${meta.id}"
23+
"""
24+
gcta \\
25+
--grm ${meta.id} \\
26+
--grm-cutoff 0.05 \\
27+
--make-grm \\
28+
--out ${prefix}_unrel05 \\
29+
--thread-num ${task.cpus} \\
30+
${args}
31+
"""
32+
33+
stub:
34+
def prefix = task.ext.prefix ?: "${meta.id}"
35+
"""
36+
touch ${prefix}_unrel05.grm.id
37+
touch ${prefix}_unrel05.grm.bin
38+
touch ${prefix}_unrel05.grm.N.bin
39+
"""
40+
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
2+
name: "gcta_removerelatedsubjects"
3+
description: Remove related individuals from a dense GRM using `gcta --grm-cutoff`
4+
keywords:
5+
- gcta
6+
- genome-wide complex trait analysis
7+
- grm
8+
- genetic relationship matrix
9+
- genetics
10+
tools:
11+
- "gcta":
12+
description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data."
13+
homepage: "https://yanglab.westlake.edu.cn/software/gcta/"
14+
documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf"
15+
tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/"
16+
licence: ["GPL-3.0-only"]
17+
identifier: "biotools:gcta"
18+
19+
input:
20+
- - meta:
21+
type: map
22+
description: |
23+
Groovy map containing dense GRM metadata.
24+
`meta.id` is the required GRM basename consumed by `--grm` and must match
25+
the staged dense GRM files.
26+
e.g. `[ id:'tiny_dense' ]` requires
27+
`tiny_dense.grm.id`, `tiny_dense.grm.bin`, and `tiny_dense.grm.N.bin`.
28+
- grm_files:
29+
type: file
30+
description: Dense GRM file bundle with basename `${meta.id}`
31+
pattern: "*.grm.*"
32+
ontologies: []
33+
34+
output:
35+
grm_files:
36+
- - meta:
37+
type: map
38+
description: |
39+
Groovy map containing dense GRM metadata.
40+
`meta.id` remains the dense-GRM basename contract used for `--grm`.
41+
- "*_unrel05.grm.*":
42+
type: file
43+
description: Relatedness-filtered GRM file bundle
44+
pattern: "*_unrel05.grm.*"
45+
ontologies: []
46+
keep_file:
47+
- - meta:
48+
type: map
49+
description: |
50+
Groovy map containing dense GRM metadata.
51+
`meta.id` remains the dense-GRM basename contract used for `--grm`.
52+
- "*_unrel05.grm.id":
53+
type: file
54+
description: Keep file of unrelated individuals emitted by GCTA
55+
pattern: "*_unrel05.grm.id"
56+
ontologies: []
57+
versions_gcta:
58+
- - "${task.process}":
59+
type: string
60+
description: The process the version was collected from
61+
- "gcta":
62+
type: string
63+
description: The tool name
64+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
65+
type: eval
66+
description: The command used to retrieve the GCTA version
67+
68+
topics:
69+
versions:
70+
- - ${task.process}:
71+
type: string
72+
description: The process the version was collected from
73+
- gcta:
74+
type: string
75+
description: The tool name
76+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
77+
type: eval
78+
description: The command used to retrieve the GCTA version
79+
80+
authors:
81+
- "@lyh970817"
82+
maintainers:
83+
- "@lyh970817"
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
nextflow_process {
2+
3+
name "Test Process GCTA_REMOVERELATEDSUBJECTS"
4+
script "../main.nf"
5+
process "GCTA_REMOVERELATEDSUBJECTS"
6+
7+
tag "modules"
8+
tag "modules_nfcore"
9+
tag "gcta"
10+
tag "gcta/removerelatedsubjects"
11+
tag "gcta/makegrm"
12+
13+
setup {
14+
run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_CONTRACT") {
15+
script "../../makegrm/main.nf"
16+
process {
17+
"""
18+
file('tiny_dense.mbfile').text = 'plink_simulated\\n'
19+
20+
input[0] = [
21+
[ id:'tiny_dense' ],
22+
file('tiny_dense.mbfile'),
23+
[
24+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
25+
],
26+
[
27+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
28+
],
29+
[
30+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
31+
]
32+
]
33+
"""
34+
}
35+
}
36+
37+
run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_STUB") {
38+
script "../../makegrm/main.nf"
39+
process {
40+
"""
41+
file('stub_dense.mbfile').text = 'plink_simulated\\n'
42+
43+
input[0] = [
44+
[ id:'stub_dense' ],
45+
file('stub_dense.mbfile'),
46+
[
47+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
48+
],
49+
[
50+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
51+
],
52+
[
53+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
54+
]
55+
]
56+
"""
57+
}
58+
}
59+
}
60+
61+
test("homo_sapiens popgen - remove related individuals from dense GRM") {
62+
when {
63+
process {
64+
"""
65+
input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files
66+
"""
67+
}
68+
}
69+
70+
then {
71+
assertAll(
72+
{ assert process.success },
73+
{ assert process.out.grm_files.size() == 1 },
74+
{ assert process.out.keep_file.size() == 1 },
75+
{ assert process.out.grm_files.get(0).get(0).id == "tiny_dense" },
76+
{
77+
def grm_row = process.out.grm_files.get(0)
78+
def expected_prefix = "${grm_row.get(0).id}_unrel05"
79+
assert grm_row.get(1).collect { file(it).name }.sort() == [
80+
"${expected_prefix}.grm.N.bin",
81+
"${expected_prefix}.grm.bin",
82+
"${expected_prefix}.grm.id"
83+
]
84+
assert file(process.out.keep_file.get(0).get(1)).name == "${expected_prefix}.grm.id"
85+
},
86+
{
87+
assert snapshot(
88+
process.out.grm_files,
89+
process.out.keep_file,
90+
process.out.findAll { key, val -> key.startsWith('versions') }
91+
).match()
92+
}
93+
)
94+
}
95+
}
96+
97+
test("homo_sapiens popgen - remove related individuals fails when meta.id is not GRM basename") {
98+
when {
99+
process {
100+
"""
101+
input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files.map { meta, grm_files ->
102+
[[ id:'tiny_dense_mismatched' ], grm_files]
103+
}
104+
"""
105+
}
106+
}
107+
108+
then {
109+
assertAll(
110+
{ assert !process.success },
111+
{ assert process.exitStatus != 0 }
112+
)
113+
}
114+
}
115+
116+
test("homo_sapiens popgen - remove related individuals fails for malformed GRM tuple") {
117+
when {
118+
process {
119+
"""
120+
input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files.map { meta, grm_files ->
121+
[[ id:meta.id ]]
122+
}
123+
"""
124+
}
125+
}
126+
127+
then {
128+
assert !process.success
129+
}
130+
}
131+
132+
test("homo_sapiens popgen - remove related individuals from dense GRM - stub") {
133+
options "-stub"
134+
135+
when {
136+
process {
137+
"""
138+
input[0] = GCTA_MAKEGRM_STUB.out.grm_files
139+
"""
140+
}
141+
}
142+
143+
then {
144+
assertAll(
145+
{ assert process.success },
146+
{ assert process.out.grm_files.get(0).get(0).id == "stub_dense" },
147+
{
148+
def grm_row = process.out.grm_files.get(0)
149+
def expected_prefix = "${grm_row.get(0).id}_unrel05"
150+
assert grm_row.get(1).collect { file(it).name }.sort() == [
151+
"${expected_prefix}.grm.N.bin",
152+
"${expected_prefix}.grm.bin",
153+
"${expected_prefix}.grm.id"
154+
]
155+
assert file(process.out.keep_file.get(0).get(1)).name == "${expected_prefix}.grm.id"
156+
},
157+
{
158+
assert snapshot(
159+
process.out.grm_files,
160+
process.out.keep_file,
161+
process.out.findAll { key, val -> key.startsWith('versions') }
162+
).match()
163+
}
164+
)
165+
}
166+
}
167+
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
{
2+
"homo_sapiens popgen - remove related individuals from dense GRM": {
3+
"content": [
4+
[
5+
[
6+
{
7+
"id": "tiny_dense"
8+
},
9+
[
10+
"tiny_dense_unrel05.grm.N.bin:md5,06b73ea8bae8f1e5f5d4de33dbd2c75e",
11+
"tiny_dense_unrel05.grm.bin:md5,b1f124463eecbae86840a6651eec372d",
12+
"tiny_dense_unrel05.grm.id:md5,ca8c0bded6951fdd3bf0dddc97b6df6b"
13+
]
14+
]
15+
],
16+
[
17+
[
18+
{
19+
"id": "tiny_dense"
20+
},
21+
"tiny_dense_unrel05.grm.id:md5,ca8c0bded6951fdd3bf0dddc97b6df6b"
22+
]
23+
],
24+
{
25+
"versions_gcta": [
26+
[
27+
"GCTA_REMOVERELATEDSUBJECTS",
28+
"gcta",
29+
"1.94.1"
30+
]
31+
]
32+
}
33+
],
34+
"meta": {
35+
"nf-test": "0.9.3",
36+
"nextflow": "25.10.4"
37+
},
38+
"timestamp": "2026-05-15T22:50:50.597885689"
39+
},
40+
"homo_sapiens popgen - remove related individuals from dense GRM - stub": {
41+
"content": [
42+
[
43+
[
44+
{
45+
"id": "stub_dense"
46+
},
47+
[
48+
"stub_dense_unrel05.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e",
49+
"stub_dense_unrel05.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e",
50+
"stub_dense_unrel05.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e"
51+
]
52+
]
53+
],
54+
[
55+
[
56+
{
57+
"id": "stub_dense"
58+
},
59+
"stub_dense_unrel05.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e"
60+
]
61+
],
62+
{
63+
"versions_gcta": [
64+
[
65+
"GCTA_REMOVERELATEDSUBJECTS",
66+
"gcta",
67+
"1.94.1"
68+
]
69+
]
70+
}
71+
],
72+
"meta": {
73+
"nf-test": "0.9.3",
74+
"nextflow": "25.10.4"
75+
},
76+
"timestamp": "2026-05-15T22:55:53.542534458"
77+
}
78+
}

0 commit comments

Comments
 (0)