Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions modules/nf-core/gridss/preprocess/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::gridss=2.13.2"
59 changes: 59 additions & 0 deletions modules/nf-core/gridss/preprocess/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
process GRIDSS_PREPROCESS {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gridss:2.13.2--h50ea8bc_3':
'quay.io/biocontainers/gridss:2.13.2--h50ea8bc_3' }"

input:
tuple val(meta), path(bam), path(bai)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fasta_fai)
tuple val(meta4), path(bwa_index)

output:
tuple val(meta), path("*.gridss.working"), emit: preprocess_dir
tuple val("${task.process}"), val('gridss'), eval("CallVariants --version 2>&1 | sed 's/-gridss\$//'"), topic: versions, emit: versions_gridss

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
ln -s \$(find -L ${bwa_index} -regex '.*\\.\\(amb\\|ann\\|pac\\|gridsscache\\|sa\\|bwt\\|img\\|alt\\)') ./

gridss \\
--threads ${task.cpus} \\
--steps preprocess \\
--jvmheap ${task.memory.toGiga() - 1}g \\
--otherjvmheap ${task.memory.toGiga() - 1}g \\
--reference ${fasta} \\
${args} \\
${bam}

"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
mkdir -p ${prefix}.gridss.working/

touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.cigar_metrics
touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.computesamtags.changes.tsv
touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.coverage.blacklist.bed
touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.idsv_metrics
touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.insert_size_histogram.pdf
touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.insert_size_metrics
touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.mapq_metrics
touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.sv.bam
touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.sv.bam.csi
touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.tag_metrics
"""
}
100 changes: 100 additions & 0 deletions modules/nf-core/gridss/preprocess/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
name: "gridss_preprocess"
description: Run the preprocess step of GRIDSS to extract multiple Picard
metrics (insert size, MAPQ, CIGAR, IDSV, tag and coverage metrics) from an
input BAM file prior to assembly and variant calling.
keywords:
- gridss
- preprocess
- structural variants
- bam
tools:
- gridss:
description: "GRIDSS: the Genomic Rearrangement IDentification Software Suite"
homepage: "https://github.com/PapenfussLab/gridss"
documentation: "https://github.com/PapenfussLab/gridss/wiki/GRIDSS-Documentation"
tool_dev_url: "https://github.com/PapenfussLab/gridss"
doi: "10.1186/s13059-021-02423-x"
licence:
- "GPL v3"
identifier: biotools:gridss
input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- bam:
type: file
description: Input BAM file
pattern: "*.bam"
ontologies:
- edam: http://edamontology.org/format_2572
- bai:
type: file
description: Index of the input BAM file
pattern: "*.bai"
ontologies: []
- - meta2:
type: map
description: |
Groovy Map containing reference information
- fasta:
type: file
description: The reference fasta
pattern: "*.{fa,fna,fasta}"
ontologies: []
- - meta3:
type: map
description: |
Groovy Map containing reference information
- fasta_fai:
type: file
description: The index of the reference fasta
pattern: "*.fai"
ontologies: []
- - meta4:
type: map
description: |
Groovy Map containing reference information
- bwa_index:
type: directory
description: The BWA index created from the reference fasta, will be
generated by Gridss in the setupreference step
output:
preprocess_dir:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- "*.gridss.working":
type: directory
description: The working directory produced by the GRIDSS preprocess
step containing Picard metrics (insert size, MAPQ, CIGAR, IDSV, tag,
coverage) and SV-relevant reads used by downstream GRIDSS steps
pattern: "*.gridss.working"
versions_gridss:
- - ${task.process}:
type: string
description: The process
- gridss:
type: string
description: The tool name
- CallVariants --version 2>&1 | sed 's/-gridss\$//':
type: eval
description: The expression to obtain the version of the tool
topics:
versions:
- - ${task.process}:
type: string
description: The process

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
description: The process
description: The name of the process

- gridss:
type: string
description: The tool name
- CallVariants --version 2>&1 | sed 's/-gridss\$//':
type: eval
description: The expression to obtain the version of the tool
authors:
- "@imsarath"
maintainers:
- "@imsarath"
107 changes: 107 additions & 0 deletions modules/nf-core/gridss/preprocess/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
nextflow_process {

name "Test Process GRIDSS_PREPROCESS"
script "../main.nf"
config "./nextflow.config"
process "GRIDSS_PREPROCESS"

tag "modules"
tag "modules_nfcore"
tag "gridss"
tag "gridss/preprocess"
tag "bwa/index"

setup {

run("BWA_INDEX") {
script "../../../bwa/index/main.nf"
process {
"""
input[0] = [ [id:'fasta'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
]
"""
}
}
}

test("human - bam - bwa") {

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
]
input[1] = [ [id:'fasta'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
]
input[2] = [ [id:'fasta_fai'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
]
input[3] = BWA_INDEX.out.index
"""
}
}

then {
def workdir = process.out.preprocess_dir[0][1]
def prefix = "test.paired_end.sorted.bam"
// Picard `# Started on:` and R's PDF `/CreationDate` add per-run timestamps,
// and `sv.bam`/`sv.bam.csi` BGZF blocks differ across conda/docker — snapshot only stable parts.
// Presence of `sv.bam` and `sv.bam.csi` is covered by the directory listing snapshot.
def stripPicardHeaderMd5 = { f -> "${file(f).name}:md5,${listToMD5(path(f).readLines().findAll { !it.startsWith("#") })}" }

assertAll(
{ assert process.success },
{ assert snapshot(
process.out.preprocess_dir.collect { meta, dir -> [meta, file(dir).list().findAll { it.startsWith(prefix) }.sort()] },
path("${workdir}/${prefix}.computesamtags.changes.tsv"),
path("${workdir}/${prefix}.coverage.blacklist.bed"),
stripPicardHeaderMd5("${workdir}/${prefix}.cigar_metrics"),
stripPicardHeaderMd5("${workdir}/${prefix}.idsv_metrics"),
stripPicardHeaderMd5("${workdir}/${prefix}.insert_size_metrics"),
stripPicardHeaderMd5("${workdir}/${prefix}.mapq_metrics"),
stripPicardHeaderMd5("${workdir}/${prefix}.tag_metrics"),
Comment on lines +60 to +67

@beatrizsavinhas beatrizsavinhas Jun 15, 2026

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think lines 60-67 are actually testing the tool, not the module. The behaviour of the tool should be tested in the tool itself. For the nextflow module, I would think checking the process outputs is suficient.

Suggested change
process.out.preprocess_dir.collect { meta, dir -> [meta, file(dir).list().findAll { it.startsWith(prefix) }.sort()] },
path("${workdir}/${prefix}.computesamtags.changes.tsv"),
path("${workdir}/${prefix}.coverage.blacklist.bed"),
stripPicardHeaderMd5("${workdir}/${prefix}.cigar_metrics"),
stripPicardHeaderMd5("${workdir}/${prefix}.idsv_metrics"),
stripPicardHeaderMd5("${workdir}/${prefix}.insert_size_metrics"),
stripPicardHeaderMd5("${workdir}/${prefix}.mapq_metrics"),
stripPicardHeaderMd5("${workdir}/${prefix}.tag_metrics"),
process.out

Looking at the tests for cadd for example, that seems to be the case. 🤔

process.out.findAll { key, val -> key.startsWith("versions") }

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe that with nf-test 0.9.5, which the CI for this repo already has, this can be done simply by:

Suggested change
process.out.findAll { key, val -> key.startsWith("versions") }
topics

Provided that topics "versions" is added above.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or even just process.out.versions_gridss? 🤔

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

process.out.findAll { key, val -> key.startsWith("versions") } is the standard way we have been using in the repo at the moment.

).match() }
)
}
}

test("human - bam - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
]
input[1] = [ [id:'fasta'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
]
input[2] = [ [id:'fasta_fai'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
]
input[3] = [ [], [] ]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.preprocess_dir,
process.out.findAll { key, val -> key.startsWith("versions") }
).match() }
)
}
}
}
83 changes: 83 additions & 0 deletions modules/nf-core/gridss/preprocess/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
{
"human - bam - stub": {
"content": [
[
[
{
"id": "test"
},
[
"test.gridss.targeted.bam.cigar_metrics:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.gridss.targeted.bam.computesamtags.changes.tsv:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.gridss.targeted.bam.coverage.blacklist.bed:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.gridss.targeted.bam.idsv_metrics:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.gridss.targeted.bam.insert_size_histogram.pdf:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.gridss.targeted.bam.insert_size_metrics:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.gridss.targeted.bam.mapq_metrics:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.gridss.targeted.bam.sv.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.gridss.targeted.bam.sv.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.gridss.targeted.bam.tag_metrics:md5,d41d8cd98f00b204e9800998ecf8427e"
]
]
],
{
"versions_gridss": [
[
"GRIDSS_PREPROCESS",
"gridss",
"2.13.2"
]
]
}
],
"timestamp": "2026-06-12T14:30:07.433688807",
"meta": {
"nf-test": "0.9.4",
"nextflow": "26.04.3"
}
},
"human - bam - bwa": {
"content": [
[
[
{
"id": "test"
},
[
"test.paired_end.sorted.bam.cigar_metrics",
"test.paired_end.sorted.bam.computesamtags.changes.tsv",
"test.paired_end.sorted.bam.coverage.blacklist.bed",
"test.paired_end.sorted.bam.idsv_metrics",
"test.paired_end.sorted.bam.insert_size_histogram.pdf",
"test.paired_end.sorted.bam.insert_size_metrics",
"test.paired_end.sorted.bam.mapq_metrics",
"test.paired_end.sorted.bam.sv.bam",
"test.paired_end.sorted.bam.sv.bam.csi",
"test.paired_end.sorted.bam.tag_metrics"
]
]
],
"test.paired_end.sorted.bam.computesamtags.changes.tsv:md5,ce0a87ccee35f990cd878e12a8a84bae",
"test.paired_end.sorted.bam.coverage.blacklist.bed:md5,63a1da1606bf23357ad6b1b166c21651",
"test.paired_end.sorted.bam.cigar_metrics:md5,29b9a700b9da9f2cb23e199e8c15af31",
"test.paired_end.sorted.bam.idsv_metrics:md5,eeee010100dcea8b9f9eeeac44f8d142",
"test.paired_end.sorted.bam.insert_size_metrics:md5,4a7d860f1073a82c093373bdbdbd6bf3",
"test.paired_end.sorted.bam.mapq_metrics:md5,dff79a95a8f9b40de520ef438c9fab59",
"test.paired_end.sorted.bam.tag_metrics:md5,486bf2bab6f506dd20f98e7fa05ae39c",
{
"versions_gridss": [
[
"GRIDSS_PREPROCESS",
"gridss",
"2.13.2"
]
]
}
],
"timestamp": "2026-06-12T14:29:56.150511142",
"meta": {
"nf-test": "0.9.4",
"nextflow": "26.04.3"
}
}
}
6 changes: 6 additions & 0 deletions modules/nf-core/gridss/preprocess/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
process {
withName: BWA_INDEX {
// GRIDSS requires the BWA index to be prefixed with full name - "genome.fasta"
ext.prefix = { "genome.fasta" }
}
}