Skip to content

Commit 2556ab9

Browse files
committed
Restore gcta/makegrmpart reference module
1 parent b910bbf commit 2556ab9

5 files changed

Lines changed: 582 additions & 0 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
- bioconda::gcta=1.94.1
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
process GCTA_MAKEGRMPART {
2+
tag "${meta.id}: part ${part_gcta_job} of ${nparts_gcta}"
3+
label 'process_medium'
4+
conda "${moduleDir}/environment.yml"
5+
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
6+
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data'
7+
: 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}"
8+
9+
input:
10+
tuple val(meta), val(nparts_gcta), val(part_gcta_job), path(mfile), path(bed_pgen), path(bim_pvar), path(fam_psam)
11+
tuple val(meta2), path(snp_group_file)
12+
13+
output:
14+
tuple val(meta), path("*.part_${nparts}_${part}.grm.*"), val(nparts_gcta), val(part_gcta_job), emit: grm_files
15+
tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions
16+
17+
when:
18+
task.ext.when == null || task.ext.when
19+
20+
script:
21+
nparts = nparts_gcta ?: 1
22+
part = part_gcta_job ?: 1
23+
def extract_cmd = snp_group_file ? "--extract ${snp_group_file}" : ''
24+
def extra_args = task.ext.args ?: ''
25+
def prefix = task.ext.prefix ?: "${meta.id}"
26+
def genotype_files = bed_pgen instanceof List ? bed_pgen : [bed_pgen]
27+
def genotype_extension = genotype_files[0].name.tokenize('.').last()
28+
def multi_file_flag = genotype_extension == 'pgen' ? '--mpfile' : '--mbfile'
29+
30+
"""
31+
gcta \\
32+
${multi_file_flag} ${mfile} \\
33+
--make-grm-part ${nparts} ${part} \\
34+
${extract_cmd} \\
35+
--thread-num ${task.cpus} \\
36+
--out ${prefix} ${extra_args}
37+
"""
38+
39+
stub:
40+
nparts = nparts_gcta ?: 1
41+
part = part_gcta_job ?: 1
42+
def prefix = task.ext.prefix ?: "${meta.id}"
43+
"""
44+
touch ${prefix}.part_${nparts}_${part}.grm.id
45+
touch ${prefix}.part_${nparts}_${part}.grm.bin
46+
touch ${prefix}.part_${nparts}_${part}.grm.N.bin
47+
"""
48+
}
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
2+
name: "gcta_makegrmpart"
3+
description: Compute one partition of a GCTA genetic relationship matrix
4+
keywords:
5+
- gcta
6+
- genome-wide complex trait analysis
7+
- grm
8+
- genetic relationship matrix
9+
- genetics
10+
tools:
11+
- "gcta":
12+
description: "GCTA is a tool for genome-wide complex trait analysis."
13+
homepage: "https://yanglab.westlake.edu.cn/software/gcta/"
14+
documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf"
15+
tool_dev_url: "https://github.com/jianyangqt/gcta"
16+
licence:
17+
- "GPL-3.0-only"
18+
identifier: biotools:gcta
19+
input:
20+
- - meta:
21+
type: map
22+
description: |
23+
Groovy Map containing GRM-partition sample metadata
24+
e.g. `[ id:'gcta_grm' ]`
25+
- nparts_gcta:
26+
type: integer
27+
description: Total number of GRM partitions requested via
28+
`--make-grm-part`; defaults to `1` when `null`
29+
default: 1
30+
- part_gcta_job:
31+
type: integer
32+
description: One-based index of the GRM partition to compute via
33+
`--make-grm-part`; defaults to `1` when `null`
34+
default: 1
35+
- mfile:
36+
type: file
37+
description: GCTA multi-input manifest consumed by `--mbfile` or
38+
`--mpfile`
39+
pattern: "*.{mbfile,mpfile,txt}"
40+
ontologies:
41+
- edam: "http://edamontology.org/format_2330"
42+
- bed_pgen:
43+
type: file
44+
description: Collection of PLINK primary genotype files referenced by the
45+
multi-input manifest
46+
pattern: "*.{bed,pgen}"
47+
ontologies:
48+
- edam: "http://edamontology.org/format_3003"
49+
- bim_pvar:
50+
type: file
51+
description: Collection of PLINK variant metadata files referenced by the
52+
multi-input manifest
53+
pattern: "*.{bim,pvar}"
54+
ontologies: []
55+
- fam_psam:
56+
type: file
57+
description: Collection of PLINK sample metadata files referenced by the
58+
multi-input manifest
59+
pattern: "*.{fam,psam}"
60+
ontologies: []
61+
- - meta2:
62+
type: map
63+
description: |
64+
Groovy Map containing SNP-selection metadata
65+
e.g. `[ id:'snp_group1' ]`
66+
- snp_group_file:
67+
type: file
68+
description: Optional SNP extraction file passed to `--extract`; provide
69+
`[]` when absent
70+
pattern: "*.{txt,list}"
71+
ontologies:
72+
- edam: "http://edamontology.org/format_2330"
73+
output:
74+
grm_files:
75+
- - meta:
76+
type: map
77+
description: |
78+
Groovy Map containing GRM-partition sample metadata
79+
e.g. `[ id:'gcta_grm' ]`
80+
- "*.part_${nparts}_${part}.grm.*":
81+
type: file
82+
description: Partitioned GRM output files, including ID, binary matrix,
83+
and sample-count matrix files
84+
pattern: "*.part_${nparts}_${part}.grm.*"
85+
ontologies: []
86+
- nparts_gcta:
87+
type: integer
88+
description: Total number of GRM partitions requested via
89+
`--make-grm-part`
90+
- part_gcta_job:
91+
type: integer
92+
description: One-based index of the GRM partition computed via
93+
`--make-grm-part`
94+
versions_gcta:
95+
- - ${task.process}:
96+
type: string
97+
description: The process the versions were collected from
98+
- gcta:
99+
type: string
100+
description: The tool name
101+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
102+
type: eval
103+
description: The command used to generate the version of the tool
104+
topics:
105+
versions:
106+
- - ${task.process}:
107+
type: string
108+
description: The process the versions were collected from
109+
- gcta:
110+
type: string
111+
description: The tool name
112+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
113+
type: eval
114+
description: The command used to generate the version of the tool
115+
authors:
116+
- "@lyh970817"
117+
maintainers:
118+
- "@lyh970817"

0 commit comments

Comments
 (0)