-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Add new component: gridss/preprocess #11988
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| --- | ||
| # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
| channels: | ||
| - conda-forge | ||
| - bioconda | ||
| dependencies: | ||
| - "bioconda::gridss=2.13.2" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| process GRIDSS_PREPROCESS { | ||
| tag "$meta.id" | ||
| label 'process_medium' | ||
|
|
||
| conda "${moduleDir}/environment.yml" | ||
| container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? | ||
| 'https://depot.galaxyproject.org/singularity/gridss:2.13.2--h50ea8bc_3': | ||
| 'quay.io/biocontainers/gridss:2.13.2--h50ea8bc_3' }" | ||
|
|
||
| input: | ||
| tuple val(meta), path(bam), path(bai) | ||
| tuple val(meta2), path(fasta) | ||
| tuple val(meta3), path(fasta_fai) | ||
| tuple val(meta4), path(bwa_index) | ||
|
|
||
| output: | ||
| tuple val(meta), path("*.gridss.working"), emit: preprocess_dir | ||
| tuple val("${task.process}"), val('gridss'), eval("CallVariants --version 2>&1 | sed 's/-gridss\$//'"), topic: versions, emit: versions_gridss | ||
|
|
||
| when: | ||
| task.ext.when == null || task.ext.when | ||
|
|
||
| script: | ||
| def args = task.ext.args ?: '' | ||
| def prefix = task.ext.prefix ?: "${meta.id}" | ||
|
|
||
| """ | ||
| ln -s \$(find -L ${bwa_index} -regex '.*\\.\\(amb\\|ann\\|pac\\|gridsscache\\|sa\\|bwt\\|img\\|alt\\)') ./ | ||
|
|
||
| gridss \\ | ||
| --threads ${task.cpus} \\ | ||
| --steps preprocess \\ | ||
| --jvmheap ${task.memory.toGiga() - 1}g \\ | ||
| --otherjvmheap ${task.memory.toGiga() - 1}g \\ | ||
| --reference ${fasta} \\ | ||
| ${args} \\ | ||
| ${bam} | ||
|
|
||
| """ | ||
|
|
||
| stub: | ||
| def args = task.ext.args ?: '' | ||
| def prefix = task.ext.prefix ?: "${meta.id}" | ||
|
|
||
| """ | ||
| mkdir -p ${prefix}.gridss.working/ | ||
|
|
||
| touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.cigar_metrics | ||
| touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.computesamtags.changes.tsv | ||
| touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.coverage.blacklist.bed | ||
| touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.idsv_metrics | ||
| touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.insert_size_histogram.pdf | ||
| touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.insert_size_metrics | ||
| touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.mapq_metrics | ||
| touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.sv.bam | ||
| touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.sv.bam.csi | ||
| touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.tag_metrics | ||
| """ | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,100 @@ | ||
| name: "gridss_preprocess" | ||
| description: Run the preprocess step of GRIDSS to extract multiple Picard | ||
| metrics (insert size, MAPQ, CIGAR, IDSV, tag and coverage metrics) from an | ||
| input BAM file prior to assembly and variant calling. | ||
| keywords: | ||
| - gridss | ||
| - preprocess | ||
| - structural variants | ||
| - bam | ||
| tools: | ||
| - gridss: | ||
| description: "GRIDSS: the Genomic Rearrangement IDentification Software Suite" | ||
| homepage: "https://github.com/PapenfussLab/gridss" | ||
| documentation: "https://github.com/PapenfussLab/gridss/wiki/GRIDSS-Documentation" | ||
| tool_dev_url: "https://github.com/PapenfussLab/gridss" | ||
| doi: "10.1186/s13059-021-02423-x" | ||
| licence: | ||
| - "GPL v3" | ||
| identifier: biotools:gridss | ||
| input: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. [ id:'test' ] | ||
| - bam: | ||
| type: file | ||
| description: Input BAM file | ||
| pattern: "*.bam" | ||
| ontologies: | ||
| - edam: http://edamontology.org/format_2572 | ||
| - bai: | ||
| type: file | ||
| description: Index of the input BAM file | ||
| pattern: "*.bai" | ||
| ontologies: [] | ||
| - - meta2: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing reference information | ||
| - fasta: | ||
| type: file | ||
| description: The reference fasta | ||
| pattern: "*.{fa,fna,fasta}" | ||
| ontologies: [] | ||
| - - meta3: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing reference information | ||
| - fasta_fai: | ||
| type: file | ||
| description: The index of the reference fasta | ||
| pattern: "*.fai" | ||
| ontologies: [] | ||
| - - meta4: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing reference information | ||
| - bwa_index: | ||
| type: directory | ||
| description: The BWA index created from the reference fasta, will be | ||
| generated by Gridss in the setupreference step | ||
| output: | ||
| preprocess_dir: | ||
| - - meta: | ||
| type: map | ||
| description: | | ||
| Groovy Map containing sample information | ||
| e.g. [ id:'test' ] | ||
| - "*.gridss.working": | ||
| type: directory | ||
| description: The working directory produced by the GRIDSS preprocess | ||
| step containing Picard metrics (insert size, MAPQ, CIGAR, IDSV, tag, | ||
| coverage) and SV-relevant reads used by downstream GRIDSS steps | ||
| pattern: "*.gridss.working" | ||
| versions_gridss: | ||
| - - ${task.process}: | ||
| type: string | ||
| description: The process | ||
| - gridss: | ||
| type: string | ||
| description: The tool name | ||
| - CallVariants --version 2>&1 | sed 's/-gridss\$//': | ||
| type: eval | ||
| description: The expression to obtain the version of the tool | ||
| topics: | ||
| versions: | ||
| - - ${task.process}: | ||
| type: string | ||
| description: The process | ||
| - gridss: | ||
| type: string | ||
| description: The tool name | ||
| - CallVariants --version 2>&1 | sed 's/-gridss\$//': | ||
| type: eval | ||
| description: The expression to obtain the version of the tool | ||
| authors: | ||
| - "@imsarath" | ||
| maintainers: | ||
| - "@imsarath" | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,107 @@ | ||||||||||||||||||||
| nextflow_process { | ||||||||||||||||||||
|
|
||||||||||||||||||||
| name "Test Process GRIDSS_PREPROCESS" | ||||||||||||||||||||
| script "../main.nf" | ||||||||||||||||||||
| config "./nextflow.config" | ||||||||||||||||||||
| process "GRIDSS_PREPROCESS" | ||||||||||||||||||||
|
|
||||||||||||||||||||
| tag "modules" | ||||||||||||||||||||
| tag "modules_nfcore" | ||||||||||||||||||||
| tag "gridss" | ||||||||||||||||||||
| tag "gridss/preprocess" | ||||||||||||||||||||
| tag "bwa/index" | ||||||||||||||||||||
|
|
||||||||||||||||||||
| setup { | ||||||||||||||||||||
|
|
||||||||||||||||||||
| run("BWA_INDEX") { | ||||||||||||||||||||
| script "../../../bwa/index/main.nf" | ||||||||||||||||||||
| process { | ||||||||||||||||||||
| """ | ||||||||||||||||||||
| input[0] = [ [id:'fasta'], | ||||||||||||||||||||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) | ||||||||||||||||||||
| ] | ||||||||||||||||||||
| """ | ||||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| test("human - bam - bwa") { | ||||||||||||||||||||
|
|
||||||||||||||||||||
| when { | ||||||||||||||||||||
| process { | ||||||||||||||||||||
| """ | ||||||||||||||||||||
| input[0] = [ | ||||||||||||||||||||
| [ id:'test' ], | ||||||||||||||||||||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), | ||||||||||||||||||||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) | ||||||||||||||||||||
| ] | ||||||||||||||||||||
| input[1] = [ [id:'fasta'], | ||||||||||||||||||||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) | ||||||||||||||||||||
| ] | ||||||||||||||||||||
| input[2] = [ [id:'fasta_fai'], | ||||||||||||||||||||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) | ||||||||||||||||||||
| ] | ||||||||||||||||||||
| input[3] = BWA_INDEX.out.index | ||||||||||||||||||||
| """ | ||||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| then { | ||||||||||||||||||||
| def workdir = process.out.preprocess_dir[0][1] | ||||||||||||||||||||
| def prefix = "test.paired_end.sorted.bam" | ||||||||||||||||||||
| // Picard `# Started on:` and R's PDF `/CreationDate` add per-run timestamps, | ||||||||||||||||||||
| // and `sv.bam`/`sv.bam.csi` BGZF blocks differ across conda/docker — snapshot only stable parts. | ||||||||||||||||||||
| // Presence of `sv.bam` and `sv.bam.csi` is covered by the directory listing snapshot. | ||||||||||||||||||||
| def stripPicardHeaderMd5 = { f -> "${file(f).name}:md5,${listToMD5(path(f).readLines().findAll { !it.startsWith("#") })}" } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| assertAll( | ||||||||||||||||||||
| { assert process.success }, | ||||||||||||||||||||
| { assert snapshot( | ||||||||||||||||||||
| process.out.preprocess_dir.collect { meta, dir -> [meta, file(dir).list().findAll { it.startsWith(prefix) }.sort()] }, | ||||||||||||||||||||
| path("${workdir}/${prefix}.computesamtags.changes.tsv"), | ||||||||||||||||||||
| path("${workdir}/${prefix}.coverage.blacklist.bed"), | ||||||||||||||||||||
| stripPicardHeaderMd5("${workdir}/${prefix}.cigar_metrics"), | ||||||||||||||||||||
| stripPicardHeaderMd5("${workdir}/${prefix}.idsv_metrics"), | ||||||||||||||||||||
| stripPicardHeaderMd5("${workdir}/${prefix}.insert_size_metrics"), | ||||||||||||||||||||
| stripPicardHeaderMd5("${workdir}/${prefix}.mapq_metrics"), | ||||||||||||||||||||
| stripPicardHeaderMd5("${workdir}/${prefix}.tag_metrics"), | ||||||||||||||||||||
|
Comment on lines
+60
to
+67
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think lines 60-67 are actually testing the tool, not the module. The behaviour of the tool should be tested in the tool itself. For the nextflow module, I would think checking the process outputs is suficient.
Suggested change
Looking at the tests for cadd for example, that seems to be the case. 🤔 |
||||||||||||||||||||
| process.out.findAll { key, val -> key.startsWith("versions") } | ||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe that with
Suggested change
Provided that
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or even just
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||||||||||||||||
| ).match() } | ||||||||||||||||||||
| ) | ||||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| test("human - bam - stub") { | ||||||||||||||||||||
|
|
||||||||||||||||||||
| options "-stub" | ||||||||||||||||||||
|
|
||||||||||||||||||||
| when { | ||||||||||||||||||||
| process { | ||||||||||||||||||||
| """ | ||||||||||||||||||||
| input[0] = [ | ||||||||||||||||||||
| [ id:'test' ], | ||||||||||||||||||||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), | ||||||||||||||||||||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) | ||||||||||||||||||||
| ] | ||||||||||||||||||||
| input[1] = [ [id:'fasta'], | ||||||||||||||||||||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) | ||||||||||||||||||||
| ] | ||||||||||||||||||||
| input[2] = [ [id:'fasta_fai'], | ||||||||||||||||||||
| file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) | ||||||||||||||||||||
| ] | ||||||||||||||||||||
| input[3] = [ [], [] ] | ||||||||||||||||||||
| """ | ||||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| then { | ||||||||||||||||||||
| assertAll( | ||||||||||||||||||||
| { assert process.success }, | ||||||||||||||||||||
| { assert snapshot( | ||||||||||||||||||||
| process.out.preprocess_dir, | ||||||||||||||||||||
| process.out.findAll { key, val -> key.startsWith("versions") } | ||||||||||||||||||||
| ).match() } | ||||||||||||||||||||
| ) | ||||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,83 @@ | ||
| { | ||
| "human - bam - stub": { | ||
| "content": [ | ||
| [ | ||
| [ | ||
| { | ||
| "id": "test" | ||
| }, | ||
| [ | ||
| "test.gridss.targeted.bam.cigar_metrics:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.gridss.targeted.bam.computesamtags.changes.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.gridss.targeted.bam.coverage.blacklist.bed:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.gridss.targeted.bam.idsv_metrics:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.gridss.targeted.bam.insert_size_histogram.pdf:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.gridss.targeted.bam.insert_size_metrics:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.gridss.targeted.bam.mapq_metrics:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.gridss.targeted.bam.sv.bam:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.gridss.targeted.bam.sv.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
| "test.gridss.targeted.bam.tag_metrics:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
| ] | ||
| ] | ||
| ], | ||
| { | ||
| "versions_gridss": [ | ||
| [ | ||
| "GRIDSS_PREPROCESS", | ||
| "gridss", | ||
| "2.13.2" | ||
| ] | ||
| ] | ||
| } | ||
| ], | ||
| "timestamp": "2026-06-12T14:30:07.433688807", | ||
| "meta": { | ||
| "nf-test": "0.9.4", | ||
| "nextflow": "26.04.3" | ||
| } | ||
| }, | ||
| "human - bam - bwa": { | ||
| "content": [ | ||
| [ | ||
| [ | ||
| { | ||
| "id": "test" | ||
| }, | ||
| [ | ||
| "test.paired_end.sorted.bam.cigar_metrics", | ||
| "test.paired_end.sorted.bam.computesamtags.changes.tsv", | ||
| "test.paired_end.sorted.bam.coverage.blacklist.bed", | ||
| "test.paired_end.sorted.bam.idsv_metrics", | ||
| "test.paired_end.sorted.bam.insert_size_histogram.pdf", | ||
| "test.paired_end.sorted.bam.insert_size_metrics", | ||
| "test.paired_end.sorted.bam.mapq_metrics", | ||
| "test.paired_end.sorted.bam.sv.bam", | ||
| "test.paired_end.sorted.bam.sv.bam.csi", | ||
| "test.paired_end.sorted.bam.tag_metrics" | ||
| ] | ||
| ] | ||
| ], | ||
| "test.paired_end.sorted.bam.computesamtags.changes.tsv:md5,ce0a87ccee35f990cd878e12a8a84bae", | ||
| "test.paired_end.sorted.bam.coverage.blacklist.bed:md5,63a1da1606bf23357ad6b1b166c21651", | ||
| "test.paired_end.sorted.bam.cigar_metrics:md5,29b9a700b9da9f2cb23e199e8c15af31", | ||
| "test.paired_end.sorted.bam.idsv_metrics:md5,eeee010100dcea8b9f9eeeac44f8d142", | ||
| "test.paired_end.sorted.bam.insert_size_metrics:md5,4a7d860f1073a82c093373bdbdbd6bf3", | ||
| "test.paired_end.sorted.bam.mapq_metrics:md5,dff79a95a8f9b40de520ef438c9fab59", | ||
| "test.paired_end.sorted.bam.tag_metrics:md5,486bf2bab6f506dd20f98e7fa05ae39c", | ||
| { | ||
| "versions_gridss": [ | ||
| [ | ||
| "GRIDSS_PREPROCESS", | ||
| "gridss", | ||
| "2.13.2" | ||
| ] | ||
| ] | ||
| } | ||
| ], | ||
| "timestamp": "2026-06-12T14:29:56.150511142", | ||
| "meta": { | ||
| "nf-test": "0.9.4", | ||
| "nextflow": "26.04.3" | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| process { | ||
| withName: BWA_INDEX { | ||
| // GRIDSS requires the BWA index to be prefixed with full name - "genome.fasta" | ||
| ext.prefix = { "genome.fasta" } | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.