Skip to content

Commit 27c0789

Browse files
committed
Upgrade to Nextflow 26.04 [wip]
1 parent ddcab5d commit 27c0789

18 files changed

Lines changed: 390 additions & 397 deletions

File tree

main.nf

Lines changed: 42 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -9,43 +9,39 @@
99
----------------------------------------------------------------------------------------
1010
*/
1111

12-
nextflow.preview.output = true
12+
nextflow.preview.types = true
1313

1414
/*
1515
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
16-
IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS
16+
IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS / TYPES
1717
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1818
*/
1919

2020
include { SRA } from './workflows/sra'
2121
include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline'
2222
include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline'
23-
include { softwareVersionsToYAML } from './subworkflows/nf-core/utils_nfcore_pipeline'
23+
include { SOFTWARE_VERSIONS } from './subworkflows/nf-core/utils_nfcore_pipeline'
24+
include { Sample } from './workflows/sra'
2425

2526
/*
2627
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
27-
NAMED WORKFLOWS FOR PIPELINE
28+
WORKFLOW INPUTS
2829
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2930
*/
3031

31-
//
32-
// WORKFLOW: Run main nf-core/fetchngs analysis pipeline depending on type of identifier provided
33-
//
34-
workflow NFCORE_FETCHNGS {
32+
params {
3533

36-
take:
37-
ids // channel: database ids read in from --input
34+
// List of SRA/ENA/GEO/DDBJ identifiers to download their associated metadata and FastQ files
35+
input: Path
3836

39-
main:
37+
// Comma-separated list of ENA metadata fields to fetch before downloading data
38+
ena_metadata_fields: String = ''
4039

41-
//
42-
// WORKFLOW: Download FastQ files for SRA / ENA / GEO / DDBJ ids
43-
//
44-
SRA ( ids )
40+
// Only download metadata for public data database ids and don't download the FastQ files
41+
skip_fastq_download: Boolean = false
4542

46-
emit:
47-
samples = SRA.out.samples
48-
metadata = SRA.out.metadata
43+
// dbGaP repository key
44+
dbgap_key: Path?
4945
}
5046

5147
/*
@@ -60,42 +56,57 @@ workflow {
6056
//
6157
// SUBWORKFLOW: Run initialisation tasks
6258
//
63-
PIPELINE_INITIALISATION (
59+
ids = PIPELINE_INITIALISATION (
6460
params.version,
6561
params.validate_params,
6662
params.monochrome_logs,
6763
args,
68-
params.outdir,
64+
workflow.outputDir,
6965
params.input,
7066
params.ena_metadata_fields
7167
)
7268

7369
//
7470
// WORKFLOW: Run primary workflows for the pipeline
7571
//
76-
NFCORE_FETCHNGS (
77-
PIPELINE_INITIALISATION.out.ids
72+
sra = SRA (
73+
channel.fromList(ids),
74+
params
7875
)
76+
77+
//
78+
// SUBWORKFLOW: Collect software versions
79+
//
80+
versions = SOFTWARE_VERSIONS()
81+
7982
//
8083
// SUBWORKFLOW: Run completion tasks
8184
//
8285
PIPELINE_COMPLETION (
8386
params.email,
8487
params.email_on_fail,
8588
params.plaintext_email,
86-
params.outdir,
89+
workflow.outputDir,
8790
params.monochrome_logs,
8891
params.hook_url
8992
)
9093

9194
publish:
92-
samples = NFCORE_FETCHNGS.out.samples
93-
metadata = NFCORE_FETCHNGS.out.metadata
94-
versions = softwareVersionsToYAML()
95+
samples = sra.samples
96+
runinfo_ftp = sra.runinfo_ftp
97+
versions = versions
9598
}
9699

100+
/*
101+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
102+
WORKFLOW OUTPUTS
103+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
104+
*/
105+
97106
output {
98-
samples {
107+
108+
// List of FASTQ samples with optional MD5 checksums
109+
samples: Channel<Sample> {
99110
path { sample ->
100111
sample.fastq_1 >> 'fastq/'
101112
sample.fastq_2 >> 'fastq/'
@@ -107,12 +118,13 @@ output {
107118
}
108119
}
109120

110-
metadata {
121+
// List of download links for the given sample ids
122+
runinfo_ftp: Channel<Path> {
111123
path 'metadata'
112124
}
113125

114-
versions {
115-
path '.'
126+
// Manifest of tool versions used by the pipeline for MultiQC
127+
versions: Map<String,Map> {
116128
index {
117129
path 'nf_core_fetchngs_software_mqc_versions.yml'
118130
}

modules/local/aspera_cli/main.nf

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
nextflow.preview.types = true
2+
13
process ASPERA_CLI {
2-
tag "$meta.id"
4+
tag id
35
label 'process_medium'
46

57
conda "${moduleDir}/environment.yml"
@@ -8,29 +10,43 @@ process ASPERA_CLI {
810
'biocontainers/aspera-cli:4.14.0--hdfd78af_1' }"
911

1012
input:
11-
tuple val(meta), val(fastq)
12-
val user
13+
(
14+
id: String,
15+
single_end: Boolean,
16+
fastq_aspera: String,
17+
md5_1: String,
18+
md5_2: String?
19+
): Record
20+
user: String
1321

1422
output:
15-
tuple val(meta), path("*fastq.gz"), emit: fastq
16-
tuple val(meta), path("*md5") , emit: md5
17-
tuple val("${task.process}"), val('aspera_cli'), eval('ascli --version'), topic: versions
23+
record(
24+
id: id,
25+
fastq_1: file('*_1.fastq.gz'),
26+
fastq_2: file('*_2.fastq.gz', optional: true),
27+
md5_1: file('*_1.fastq.gz.md5'),
28+
md5_2: file('*_2.fastq.gz.md5', optional: true),
29+
)
30+
31+
topic:
32+
record(process: task.process, name: 'aspera_cli', version: eval('ascli --version')) >> 'versions'
1833

1934
script:
2035
def args = task.ext.args ?: ''
2136
def conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : ""
22-
if (meta.single_end) {
37+
def fastq = fastq_aspera.tokenize(';')
38+
if (single_end) {
2339
"""
2440
$conda_prefix
2541
2642
ascp \\
2743
$args \\
2844
-i \$CONDA_PREFIX/etc/aspera/aspera_bypass_dsa.pem \\
2945
${user}@${fastq[0]} \\
30-
${meta.id}.fastq.gz
46+
${id}.fastq.gz
3147
32-
echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5
33-
md5sum -c ${meta.id}.fastq.gz.md5
48+
echo "${md5_1} ${id}.fastq.gz" > ${id}.fastq.gz.md5
49+
md5sum -c ${id}.fastq.gz.md5
3450
"""
3551
} else {
3652
"""
@@ -40,19 +56,19 @@ process ASPERA_CLI {
4056
$args \\
4157
-i \$CONDA_PREFIX/etc/aspera/aspera_bypass_dsa.pem \\
4258
${user}@${fastq[0]} \\
43-
${meta.id}_1.fastq.gz
59+
${id}_1.fastq.gz
4460
45-
echo "${meta.md5_1} ${meta.id}_1.fastq.gz" > ${meta.id}_1.fastq.gz.md5
46-
md5sum -c ${meta.id}_1.fastq.gz.md5
61+
echo "${md5_1} ${id}_1.fastq.gz" > ${id}_1.fastq.gz.md5
62+
md5sum -c ${id}_1.fastq.gz.md5
4763
4864
ascp \\
4965
$args \\
5066
-i \$CONDA_PREFIX/etc/aspera/aspera_bypass_dsa.pem \\
5167
${user}@${fastq[1]} \\
52-
${meta.id}_2.fastq.gz
68+
${id}_2.fastq.gz
5369
54-
echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5
55-
md5sum -c ${meta.id}_2.fastq.gz.md5
70+
echo "${md5_2} ${id}_2.fastq.gz" > ${id}_2.fastq.gz.md5
71+
md5sum -c ${id}_2.fastq.gz.md5
5672
"""
5773
}
5874
}
Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11

2+
nextflow.preview.types = true
3+
24
process SRA_FASTQ_FTP {
3-
tag "$meta.id"
5+
tag id
46
label 'process_low'
57
label 'error_retry'
68

@@ -10,42 +12,56 @@ process SRA_FASTQ_FTP {
1012
'biocontainers/wget:1.21.4' }"
1113

1214
input:
13-
tuple val(meta), val(fastq)
15+
(
16+
id: String,
17+
single_end: Boolean,
18+
fastq_1: String,
19+
fastq_2: String?,
20+
md5_1: String,
21+
md5_2: String?
22+
): Record
1423

1524
output:
16-
tuple val(meta), path("*fastq.gz"), emit: fastq
17-
tuple val(meta), path("*md5") , emit: md5
18-
tuple val("${task.process}"), val('wget'), eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')"), topic: versions
25+
record(
26+
id: id,
27+
fastq_1: file('*_1.fastq.gz'),
28+
fastq_2: file('*_2.fastq.gz'),
29+
md5_1: file('*_1.fastq.gz.md5'),
30+
md5_2: file('*_2.fastq.gz.md5'),
31+
)
32+
33+
topic:
34+
record(process: task.process, name: 'wget', version: eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')")) >> 'versions'
1935

2036
script:
2137
def args = task.ext.args ?: ''
22-
if (meta.single_end) {
38+
if (single_end) {
2339
"""
2440
wget \\
2541
$args \\
26-
-O ${meta.id}.fastq.gz \\
27-
${fastq[0]}
42+
-O ${id}.fastq.gz \\
43+
${fastq_1}
2844
29-
echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5
30-
md5sum -c ${meta.id}.fastq.gz.md5
45+
echo "${md5_1} ${id}.fastq.gz" > ${id}.fastq.gz.md5
46+
md5sum -c ${id}.fastq.gz.md5
3147
"""
3248
} else {
3349
"""
3450
wget \\
3551
$args \\
36-
-O ${meta.id}_1.fastq.gz \\
37-
${fastq[0]}
52+
-O ${id}_1.fastq.gz \\
53+
${fastq_1}
3854
39-
echo "${meta.md5_1} ${meta.id}_1.fastq.gz" > ${meta.id}_1.fastq.gz.md5
40-
md5sum -c ${meta.id}_1.fastq.gz.md5
55+
echo "${md5_1} ${id}_1.fastq.gz" > ${id}_1.fastq.gz.md5
56+
md5sum -c ${id}_1.fastq.gz.md5
4157
4258
wget \\
4359
$args \\
44-
-O ${meta.id}_2.fastq.gz \\
45-
${fastq[1]}
60+
-O ${id}_2.fastq.gz \\
61+
${fastq_2}
4662
47-
echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5
48-
md5sum -c ${meta.id}_2.fastq.gz.md5
63+
echo "${md5_2} ${id}_2.fastq.gz" > ${id}_2.fastq.gz.md5
64+
md5sum -c ${id}_2.fastq.gz.md5
4965
"""
5066
}
5167
}

modules/local/sra_ids_to_runinfo/main.nf

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11

2+
nextflow.preview.types = true
3+
24
process SRA_IDS_TO_RUNINFO {
3-
tag "$id"
5+
tag id
46
label 'error_retry'
57

68
conda "conda-forge::python=3.9.5"
@@ -9,12 +11,14 @@ process SRA_IDS_TO_RUNINFO {
911
'biocontainers/python:3.9--1' }"
1012

1113
input:
12-
val id
13-
val fields
14+
id: String
15+
fields: String
1416

1517
output:
16-
path "*.tsv" , emit: tsv
17-
tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), topic: versions
18+
file('*.runinfo.tsv')
19+
20+
topic:
21+
record(process: task.process, name: 'python', version: eval("python --version | sed 's/Python //g'")) >> 'versions'
1822

1923
script:
2024
def metadata_fields = fields ? "--ena_metadata_fields ${fields}" : ''
Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11

2+
nextflow.preview.types = true
3+
24
process SRA_RUNINFO_TO_FTP {
35

46
conda "conda-forge::python=3.9.5"
@@ -7,16 +9,18 @@ process SRA_RUNINFO_TO_FTP {
79
'biocontainers/python:3.9--1' }"
810

911
input:
10-
path runinfo
12+
runinfo: Path
1113

1214
output:
13-
path "*.tsv" , emit: tsv
14-
tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), topic: versions
15+
file('*.runinfo_ftp.tsv')
16+
17+
topic:
18+
record(process: task.process, name: 'python', version: eval("python --version | sed 's/Python //g'")) >> 'versions'
1519

1620
script:
1721
"""
1822
sra_runinfo_to_ftp.py \\
19-
${runinfo.join(',')} \\
20-
${runinfo.toString().tokenize(".")[0]}.runinfo_ftp.tsv
23+
${runinfo} \\
24+
${runinfo.baseName.tokenize(".")[0]}.runinfo_ftp.tsv
2125
"""
2226
}

modules/nf-core/custom/sratoolsncbisettings/main.nf

Lines changed: 7 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)