Skip to content

Commit 7995361

Browse files
committed
Normalize gcta/remlldms LDMS setup
1 parent 2fe03f8 commit 7995361

9 files changed

Lines changed: 336 additions & 144 deletions

File tree

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
11
process GCTA_MAKEGRMPART {
2-
tag "part ${meta.part_gcta_job} of ${meta.nparts_gcta} (${meta.id})"
2+
tag "${meta.id}: part ${part_gcta_job} of ${nparts_gcta}"
33
label 'process_medium'
44
conda "${moduleDir}/environment.yml"
5-
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
6-
'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' :
7-
'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }"
5+
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
6+
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data'
7+
: 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}"
88

99
input:
10-
tuple val(meta), path(mfile), path(bed_pgen), path(bim_pvar), path(fam_psam)
10+
tuple val(meta), val(nparts_gcta), val(part_gcta_job), path(mfile), path(bed_pgen), path(bim_pvar), path(fam_psam)
1111
tuple val(meta2), path(snp_group_file)
1212

1313
output:
14-
tuple val(meta), path("*.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.id"), path("*.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.bin"), path("*.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.N.bin"), emit: grm_files
15-
tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions
14+
tuple val(meta), path("*.part_${nparts}_${part}.grm.*"), val(nparts_gcta), val(part_gcta_job), emit: grm_files
15+
tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions
1616

1717
when:
1818
task.ext.when == null || task.ext.when
1919

2020
script:
21-
def part_gcta_job = meta.part_gcta_job
22-
def nparts_gcta = meta.nparts_gcta
21+
nparts = nparts_gcta ?: 1
22+
part = part_gcta_job ?: 1
2323
def extract_cmd = snp_group_file ? "--extract ${snp_group_file}" : ''
2424
def extra_args = task.ext.args ?: ''
2525
def prefix = task.ext.prefix ?: "${meta.id}"
@@ -28,21 +28,21 @@ process GCTA_MAKEGRMPART {
2828
def multi_file_flag = genotype_extension == 'pgen' ? '--mpfile' : '--mbfile'
2929

3030
"""
31-
3231
gcta \\
3332
${multi_file_flag} ${mfile} \\
34-
--make-grm-part ${nparts_gcta} ${part_gcta_job} \\
33+
--make-grm-part ${nparts} ${part} \\
3534
${extract_cmd} \\
36-
--maf 0.01 \\
3735
--thread-num ${task.cpus} \\
3836
--out ${prefix} ${extra_args}
3937
"""
4038

4139
stub:
40+
nparts = nparts_gcta ?: 1
41+
part = part_gcta_job ?: 1
4242
def prefix = task.ext.prefix ?: "${meta.id}"
4343
"""
44-
touch ${prefix}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.id
45-
touch ${prefix}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.bin
46-
touch ${prefix}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.N.bin
44+
touch ${prefix}.part_${nparts}_${part}.grm.id
45+
touch ${prefix}.part_${nparts}_${part}.grm.bin
46+
touch ${prefix}.part_${nparts}_${part}.grm.N.bin
4747
"""
4848
}

modules/nf-core/gcta/makegrmpart/meta.yml

Lines changed: 48 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3,51 +3,70 @@ name: "gcta_makegrmpart"
33
description: Compute one partition of a GCTA genetic relationship matrix
44
keywords:
55
- gcta
6+
- genome-wide complex trait analysis
67
- grm
8+
- genetic relationship matrix
79
- genetics
810
tools:
911
- "gcta":
1012
description: "GCTA is a tool for genome-wide complex trait analysis."
1113
homepage: "https://yanglab.westlake.edu.cn/software/gcta/"
1214
documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf"
1315
tool_dev_url: "https://github.com/jianyangqt/gcta"
14-
licence: ["GPL-3.0-only"]
16+
licence:
17+
- "GPL-3.0-only"
18+
identifier: biotools:gcta
1519
input:
1620
- - meta:
1721
type: map
1822
description: |
19-
Groovy Map containing GRM-partition job metadata
20-
e.g. `[ id:'gcta_grm', part_gcta_job:1, nparts_gcta:2 ]`
23+
Groovy Map containing GRM-partition sample metadata
24+
e.g. `[ id:'gcta_grm' ]`
25+
- nparts_gcta:
26+
type: integer
27+
description: Total number of GRM partitions requested via
28+
`--make-grm-part`; defaults to `1` when `null`
29+
default: 1
30+
- part_gcta_job:
31+
type: integer
32+
description: One-based index of the GRM partition to compute via
33+
`--make-grm-part`; defaults to `1` when `null`
34+
default: 1
2135
- mfile:
2236
type: file
23-
description: GCTA multi-input manifest consumed by `--mbfile` or `--mpfile`
37+
description: GCTA multi-input manifest consumed by `--mbfile` or
38+
`--mpfile`
2439
pattern: "*.{mbfile,mpfile,txt}"
2540
ontologies:
2641
- edam: "http://edamontology.org/format_2330"
2742
- bed_pgen:
2843
type: file
29-
description: Collection of PLINK primary genotype files referenced by the multi-input manifest
44+
description: Collection of PLINK primary genotype files referenced by the
45+
multi-input manifest
3046
pattern: "*.{bed,pgen}"
3147
ontologies:
3248
- edam: "http://edamontology.org/format_3003"
3349
- bim_pvar:
3450
type: file
35-
description: Collection of PLINK variant metadata files referenced by the multi-input manifest
51+
description: Collection of PLINK variant metadata files referenced by the
52+
multi-input manifest
3653
pattern: "*.{bim,pvar}"
3754
ontologies: []
3855
- fam_psam:
3956
type: file
40-
description: Collection of PLINK sample metadata files referenced by the multi-input manifest
57+
description: Collection of PLINK sample metadata files referenced by the
58+
multi-input manifest
4159
pattern: "*.{fam,psam}"
4260
ontologies: []
4361
- - meta2:
4462
type: map
4563
description: |
4664
Groovy Map containing SNP-selection metadata
47-
e.g. `[ id:'snp_group1', snp_group:1 ]`
65+
e.g. `[ id:'snp_group1' ]`
4866
- snp_group_file:
4967
type: file
50-
description: Optional SNP extraction file passed to `--extract`; provide `[]` when absent
68+
description: Optional SNP extraction file passed to `--extract`; provide
69+
`[]` when absent
5170
pattern: "*.{txt,list}"
5271
ontologies:
5372
- edam: "http://edamontology.org/format_2330"
@@ -56,31 +75,30 @@ output:
5675
- - meta:
5776
type: map
5877
description: |
59-
Groovy Map containing GRM-partition job metadata
60-
e.g. `[ id:'gcta_grm', part_gcta_job:1, nparts_gcta:2 ]`
61-
- "*.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.id":
78+
Groovy Map containing GRM-partition sample metadata
79+
e.g. `[ id:'gcta_grm' ]`
80+
- "*.part_${nparts}_${part}.grm.*":
6281
type: file
63-
description: Partitioned GRM ID file
64-
pattern: "*.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.id"
65-
ontologies: []
66-
- "*.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.bin":
67-
type: file
68-
description: Partitioned GRM binary matrix file
69-
pattern: "*.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.bin"
70-
ontologies: []
71-
- "*.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.N.bin":
72-
type: file
73-
description: Partitioned GRM sample-count matrix file
74-
pattern: "*.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.N.bin"
82+
description: Partitioned GRM output files, including ID, binary matrix,
83+
and sample-count matrix files
84+
pattern: "*.part_${nparts}_${part}.grm.*"
7585
ontologies: []
86+
- nparts_gcta:
87+
type: integer
88+
description: Total number of GRM partitions requested via
89+
`--make-grm-part`
90+
- part_gcta_job:
91+
type: integer
92+
description: One-based index of the GRM partition computed via
93+
`--make-grm-part`
7694
versions_gcta:
77-
- - "${task.process}":
95+
- - ${task.process}:
7896
type: string
7997
description: The process the versions were collected from
80-
- "gcta":
98+
- gcta:
8199
type: string
82100
description: The tool name
83-
- "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'":
101+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
84102
type: eval
85103
description: The command used to generate the version of the tool
86104
topics:
@@ -91,10 +109,10 @@ topics:
91109
- gcta:
92110
type: string
93111
description: The tool name
94-
- gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//':
112+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
95113
type: eval
96114
description: The command used to generate the version of the tool
97115
authors:
98-
- "@andongni"
116+
- "@lyh970817"
99117
maintainers:
100-
- "@andongni"
118+
- "@lyh970817"

modules/nf-core/gcta/makegrmpart/tests/main.nf.test

Lines changed: 76 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,15 @@ nextflow_process {
1010
tag "gcta/makegrmpart"
1111

1212
test("homo_sapiens popgen - plink2") {
13-
config "./nextflow.config"
14-
1513
when {
1614
process {
1715
"""
1816
file('gcta_grm.mpfile').text = 'plink_simulated plink_simulated.pgen plink_simulated.psam plink_simulated.pvar\\n'
1917

2018
input[0] = [
21-
[ id:'gcta_grm', part_gcta_job:1, nparts_gcta:2 ],
19+
[ id:'gcta_grm' ],
20+
2,
21+
1,
2222
file('gcta_grm.mpfile'),
2323
[
2424
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true)
@@ -40,6 +40,10 @@ nextflow_process {
4040
{ assert process.success },
4141
{ assert process.out.grm_files.size() == 1 },
4242
{ assert process.out.grm_files.get(0).get(0).id == 'gcta_grm' },
43+
{ assert process.out.grm_files.get(0).get(0).keySet() == ['id'] as Set },
44+
{ assert process.out.grm_files.get(0).get(1).size() == 3 },
45+
{ assert process.out.grm_files.get(0).get(2) == 2 },
46+
{ assert process.out.grm_files.get(0).get(3) == 1 },
4347
{
4448
assert snapshot(
4549
process.out.grm_files,
@@ -51,15 +55,15 @@ nextflow_process {
5155
}
5256

5357
test("homo_sapiens popgen - plink1") {
54-
config "./nextflow.config"
55-
5658
when {
5759
process {
5860
"""
5961
file('gcta_grm.mbfile').text = 'plink_simulated\\n'
6062

6163
input[0] = [
62-
[ id:'gcta_grm_bed', part_gcta_job:1, nparts_gcta:2 ],
64+
[ id:'gcta_grm_bed' ],
65+
2,
66+
1,
6367
file('gcta_grm.mbfile'),
6468
[
6569
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
@@ -81,6 +85,10 @@ nextflow_process {
8185
{ assert process.success },
8286
{ assert process.out.grm_files.size() == 1 },
8387
{ assert process.out.grm_files.get(0).get(0).id == 'gcta_grm_bed' },
88+
{ assert process.out.grm_files.get(0).get(0).keySet() == ['id'] as Set },
89+
{ assert process.out.grm_files.get(0).get(1).size() == 3 },
90+
{ assert process.out.grm_files.get(0).get(2) == 2 },
91+
{ assert process.out.grm_files.get(0).get(3) == 1 },
8492
{
8593
assert snapshot(
8694
process.out.grm_files,
@@ -92,8 +100,6 @@ nextflow_process {
92100
}
93101

94102
test("homo_sapiens popgen - plink1 - extract snp group") {
95-
config "./nextflow.config"
96-
97103
when {
98104
process {
99105
"""
@@ -107,7 +113,9 @@ nextflow_process {
107113
file('snp_group_extract.txt').text = extractSnps
108114

109115
input[0] = [
110-
[ id:'gcta_grm_bed_extract', part_gcta_job:1, nparts_gcta:2 ],
116+
[ id:'gcta_grm_bed_extract' ],
117+
2,
118+
1,
111119
file('gcta_grm_extract.mbfile'),
112120
[
113121
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
@@ -119,7 +127,7 @@ nextflow_process {
119127
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
120128
]
121129
]
122-
input[1] = [[ id:'snp_group_extract', snp_group:1 ], file('snp_group_extract.txt')]
130+
input[1] = [[ id:'snp_group_extract' ], file('snp_group_extract.txt')]
123131
"""
124132
}
125133
}
@@ -129,6 +137,10 @@ nextflow_process {
129137
{ assert process.success },
130138
{ assert process.out.grm_files.size() == 1 },
131139
{ assert process.out.grm_files.get(0).get(0).id == 'gcta_grm_bed_extract' },
140+
{ assert process.out.grm_files.get(0).get(0).keySet() == ['id'] as Set },
141+
{ assert process.out.grm_files.get(0).get(1).size() == 3 },
142+
{ assert process.out.grm_files.get(0).get(2) == 2 },
143+
{ assert process.out.grm_files.get(0).get(3) == 1 },
132144
{
133145
assert snapshot(
134146
process.out.grm_files,
@@ -139,17 +151,69 @@ nextflow_process {
139151
}
140152
}
141153

154+
test("homo_sapiens popgen - plink1 - default partition values") {
155+
when {
156+
process {
157+
"""
158+
file('gcta_grm_default.mbfile').text = 'plink_simulated\\n'
159+
160+
input[0] = [
161+
[ id:'gcta_grm_bed_default' ],
162+
null,
163+
null,
164+
file('gcta_grm_default.mbfile'),
165+
[
166+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
167+
],
168+
[
169+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
170+
],
171+
[
172+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
173+
]
174+
]
175+
input[1] = [[ id:'snp_group0' ], []]
176+
"""
177+
}
178+
}
179+
180+
then {
181+
assertAll(
182+
{ assert process.success },
183+
{ assert process.out.grm_files.size() == 1 },
184+
{ assert process.out.grm_files.get(0).get(0).id == 'gcta_grm_bed_default' },
185+
{ assert process.out.grm_files.get(0).get(0).keySet() == ['id'] as Set },
186+
{ assert process.out.grm_files.get(0).get(1).size() == 3 },
187+
{
188+
assert process.out.grm_files.get(0).get(1).collect { it.toString().tokenize('/').last() }.toSet() == [
189+
'gcta_grm_bed_default.part_1_1.grm.id',
190+
'gcta_grm_bed_default.part_1_1.grm.bin',
191+
'gcta_grm_bed_default.part_1_1.grm.N.bin'
192+
] as Set
193+
},
194+
{ assert process.out.grm_files.get(0).get(2) == null },
195+
{ assert process.out.grm_files.get(0).get(3) == null },
196+
{
197+
assert snapshot(
198+
process.out.findAll { key, val -> key.startsWith('versions') }
199+
).match()
200+
}
201+
)
202+
}
203+
}
204+
142205
test("homo_sapiens popgen - plink1 - stub") {
143206
options "-stub"
144-
config "./nextflow.config"
145207

146208
when {
147209
process {
148210
"""
149211
file('gcta_grm.mbfile').text = 'plink_simulated\\n'
150212

151213
input[0] = [
152-
[ id:'gcta_grm_bed', part_gcta_job:1, nparts_gcta:2 ],
214+
[ id:'gcta_grm_bed' ],
215+
2,
216+
1,
153217
file('gcta_grm.mbfile'),
154218
[
155219
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)

0 commit comments

Comments
 (0)