Skip to content

Commit a2a56fe

Browse files
committed
Add gcta/fastgwa module
1 parent 4b801f9 commit a2a56fe

17 files changed

Lines changed: 1437 additions & 0 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
- bioconda::gcta=1.94.1
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
process GCTA_FASTGWA {
2+
tag "${meta.id}"
3+
label 'process_medium'
4+
conda "${moduleDir}/environment.yml"
5+
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
6+
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data'
7+
: 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}"
8+
9+
input:
10+
tuple val(meta), path(bed_pgen), path(bim_pvar), path(fam_psam)
11+
tuple val(meta2), path(sparse_grm_files)
12+
tuple val(meta3), path(phenotype_file)
13+
tuple val(meta4), path(quant_covariates_file)
14+
tuple val(meta5), path(cat_covariates_file)
15+
16+
output:
17+
tuple val(meta), path("*.fastGWA"), emit: results
18+
tuple val(meta), path("*.log"), emit: log
19+
tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions
20+
21+
when:
22+
task.ext.when == null || task.ext.when
23+
24+
script:
25+
def genotype_suffix = bed_pgen.name.tokenize('.').last()
26+
def genotype_flag = genotype_suffix == 'pgen' ? '--pfile' : '--bfile'
27+
def genotype_prefix = bed_pgen.baseName
28+
def grm_arg = sparse_grm_files ? "--grm-sparse ${meta2.id}" : ''
29+
def qcovar_arg = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : ''
30+
def covar_arg = cat_covariates_file ? "--covar ${cat_covariates_file}" : ''
31+
def prefix = task.ext.prefix ?: "${meta.id}"
32+
def out = prefix
33+
def extra_args = task.ext.args ?: ''
34+
35+
"""
36+
set -euo pipefail
37+
38+
gcta \\
39+
${genotype_flag} ${genotype_prefix} \\
40+
${grm_arg} \\
41+
--fastGWA-mlm \\
42+
--pheno ${phenotype_file} \\
43+
${qcovar_arg} \\
44+
${covar_arg} \\
45+
--thread-num ${task.cpus} \\
46+
--out ${out} ${extra_args}
47+
"""
48+
49+
stub:
50+
def prefix = task.ext.prefix ?: "${meta.id}"
51+
def out = prefix
52+
"""
53+
touch ${out}.fastGWA
54+
touch ${out}.log
55+
"""
56+
}
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
2+
name: "gcta_fastgwa"
3+
description: Run GCTA fastGWA mixed linear model association analysis with PLINK genotype inputs
4+
keywords:
5+
- gcta
6+
- genome-wide complex trait analysis
7+
- fastgwa
8+
- fast genome-wide association
9+
- gwas
10+
- genome-wide association study
11+
- genetics
12+
tools:
13+
- "gcta":
14+
description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data."
15+
homepage: "https://yanglab.westlake.edu.cn/software/gcta/"
16+
documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf"
17+
tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/"
18+
licence: ["GPL-3.0-only"]
19+
identifier: "biotools:gcta"
20+
input:
21+
- - meta:
22+
type: map
23+
description: |
24+
Groovy map containing PLINK genotype metadata
25+
e.g. `[ id:'plink_simulated' ]`
26+
- bed_pgen:
27+
type: file
28+
description: PLINK primary genotype file, either `.bed` or `.pgen`
29+
pattern: "*.{bed,pgen}"
30+
ontologies:
31+
- edam: "http://edamontology.org/format_3003"
32+
- bim_pvar:
33+
type: file
34+
description: PLINK sidecar file, either `.bim` or `.pvar`
35+
pattern: "*.{bim,pvar}"
36+
ontologies: []
37+
- fam_psam:
38+
type: file
39+
description: PLINK sidecar file, either `.fam` or `.psam`
40+
pattern: "*.{fam,psam}"
41+
ontologies: []
42+
- - meta2:
43+
type: map
44+
description: |
45+
Groovy map containing sparse GRM metadata
46+
e.g. `[ id:'plink_simulated_sp' ]`
47+
Used when sparse GRM files are supplied
48+
- sparse_grm_files:
49+
type: file
50+
description: Sparse GRM sidecar files, pass `[]` when absent
51+
pattern: "*.grm.{id,sp}"
52+
ontologies: []
53+
- - meta3:
54+
type: map
55+
description: |
56+
Groovy map containing phenotype metadata
57+
Keep only stable phenotype metadata in this map
58+
e.g. `[ id:'plink_simulated' ]`
59+
- phenotype_file:
60+
type: file
61+
description: Phenotype file
62+
pattern: "*.{phe,pheno,txt,tsv}"
63+
ontologies:
64+
- edam: "http://edamontology.org/format_3475"
65+
- - meta4:
66+
type: map
67+
description: |
68+
Groovy map containing quantitative covariate metadata
69+
e.g. `[ id:'covariates_quant' ]`
70+
- quant_covariates_file:
71+
type: file
72+
description: Quantitative covariates file, pass `[]` when absent
73+
pattern: "*.{covar,cov,txt,tsv}"
74+
ontologies:
75+
- edam: "http://edamontology.org/format_3475"
76+
- - meta5:
77+
type: map
78+
description: |
79+
Groovy map containing categorical covariate metadata
80+
e.g. `[ id:'covariates_cat' ]`
81+
- cat_covariates_file:
82+
type: file
83+
description: Categorical covariates file, pass `[]` when absent
84+
pattern: "*.{covar,cov,txt,tsv}"
85+
ontologies:
86+
- edam: "http://edamontology.org/format_3475"
87+
output:
88+
results:
89+
- - meta:
90+
type: map
91+
description: |
92+
Groovy map containing PLINK genotype metadata
93+
e.g. `[ id:'plink_simulated' ]`
94+
- "*.fastGWA":
95+
type: file
96+
description: FastGWA association results
97+
pattern: "*.fastGWA"
98+
ontologies:
99+
- edam: "http://edamontology.org/format_2330"
100+
log:
101+
- - meta:
102+
type: map
103+
description: |
104+
Groovy map containing PLINK genotype metadata
105+
e.g. `[ id:'plink_simulated' ]`
106+
- "*.log":
107+
type: file
108+
description: GCTA fastGWA log file
109+
pattern: "*.log"
110+
ontologies:
111+
- edam: "http://edamontology.org/format_2330"
112+
versions_gcta:
113+
- - "${task.process}":
114+
type: string
115+
description: The process the version was collected from
116+
- "gcta":
117+
type: string
118+
description: The tool name
119+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
120+
type: eval
121+
description: The command used to retrieve the GCTA version
122+
topics:
123+
versions:
124+
- - ${task.process}:
125+
type: string
126+
description: The process the version was collected from
127+
- gcta:
128+
type: string
129+
description: The tool name
130+
- "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
131+
type: eval
132+
description: The command used to retrieve the GCTA version
133+
authors:
134+
- "@lyh970817"
135+
maintainers:
136+
- "@lyh970817"

0 commit comments

Comments
 (0)