diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fe002881..545219c9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -48,7 +48,8 @@ jobs: - "bam_variant_calling_qdnaseq" - "bam_variant_calling_smoove" - "bam_variant_calling_wisecondorx" - - "vcf_annotate_vep_annotsv" + # - "vcf_annotate_vep_annotsv" + - "vcf_annotate" - "vcf_annotate_vcfanno" - "vcf_concat_bcftools" - "vcf_merge_callers_jasmine" diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 95b6b6af..d43797d9 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@20319c5641d495c8a52e688b7dc5fada6c3a9fbc # v8 + uses: dawidd6/action-download-artifact@ac66b43f0e6a346234dd65d4d0c8fbb31cb316e5 # v11 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 + uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/template_version_comment.yml b/.github/workflows/template_version_comment.yml deleted file mode 100644 index 537529bc..00000000 --- a/.github/workflows/template_version_comment.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: nf-core template version comment -# This workflow is triggered on PRs to check if the pipeline template version matches the latest nf-core version. -# It posts a comment to the PR, even if it comes from a fork. - -on: pull_request_target - -jobs: - template_version: - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - - name: Read template version from .nf-core.yml - uses: nichmor/minimal-read-yaml@v0.0.2 - id: read_yml - with: - config: ${{ github.workspace }}/.nf-core.yml - - - name: Install nf-core - run: | - python -m pip install --upgrade pip - pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} - - - name: Check nf-core outdated - id: nf_core_outdated - run: echo "OUTPUT=$(pip list --outdated | grep nf-core)" >> ${GITHUB_ENV} - - - name: Post nf-core template version comment - uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 - if: | - contains(env.OUTPUT, 'nf-core') - with: - repo-token: ${{ secrets.NF_CORE_BOT_AUTH_TOKEN }} - allow-repeats: false - message: | - > [!WARNING] - > Newer version of the nf-core template is available. - > - > Your pipeline is using an old version of the nf-core template: ${{ steps.read_yml.outputs['nf_core_version'] }}. - > Please update your pipeline to the latest version. - > - > For more documentation on how to update your pipeline, please see the [nf-core documentation](https://github.com/nf-core/tools?tab=readme-ov-file#sync-a-pipeline-with-the-template) and [Synchronisation documentation](https://nf-co.re/docs/contributing/sync). - # diff --git a/.nf-core.yml b/.nf-core.yml index 390d524a..f875c558 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -10,6 +10,10 @@ lint: - assets/nf-core-structural_logo_light.png - docs/images/nf-core-structural_logo_light.png - docs/images/nf-core-structural_logo_dark.png + - .github/workflows/nf-test.yml + - .github/actions/get-shards/action.yml + - .github/actions/nf-test/action.yml + - tests/default.nf.test files_unchanged: - LICENSE - .github/PULL_REQUEST_TEMPLATE.md @@ -24,7 +28,7 @@ lint: - manifest.name - manifest.homePage multiqc_config: false -nf_core_version: 3.2.0 +nf_core_version: 3.3.2 repository_type: pipeline template: author: nvnieuwk diff --git a/.prettierrc.yml b/.prettierrc.yml index c81f9a76..07dbd8bb 100644 --- a/.prettierrc.yml +++ b/.prettierrc.yml @@ -1 +1,6 @@ printWidth: 120 +tabWidth: 4 +overrides: + - files: "*.{md,yml,yaml,html,css,scss,js,cff}" + options: + tabWidth: 2 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ec2fff1..4ec38f6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 2. Added the `--bedpe` parameter. This makes the pipeline output BEDPE files alongside the VCF files. 3. Added parallelization on SV type to the delly flow 4. Added a `--gtf` parameter for annotation of gene and transcript overlap using `gatk SVAnnotate`. +5. Added `StrVCTVRE` as a new annotation tool ### `Changes` @@ -26,6 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 10. Removed the `--delly_sv_types` parameter. 11. Moved all `wisecondorx` and `qdnaseq` outputs to a separate directory in each sample output. 12. Bumped all annotation modules to the latest versions +13. Reworked the annotation structure to a per tool structure. Specify the annotations tools you want to run with `--annotate_tools`. This parameter takes a comma-separated list of tool names (options: `vep`, `vcfanno`, `svannotate`, `strvctvre` or `all` (=> all tools)) ### `Fixed` diff --git a/assets/vcfanno/annotsv.toml b/assets/vcfanno_unused/annotsv.toml similarity index 100% rename from assets/vcfanno/annotsv.toml rename to assets/vcfanno_unused/annotsv.toml diff --git a/conf/modules.config b/conf/modules.config index 378a340d..51aa4442 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -206,31 +206,31 @@ process { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - withName: "^.*VCF_ANNOTATE_VEP_ANNOTSV:BCFTOOLS_FILTER\$" { - ext.prefix = {"${meta.id}.filter"} - ext.args = "-e 'GT=\"ref\"' --output-type z" - } + // withName: "^.*VCF_ANNOTATE_VEP_ANNOTSV:BCFTOOLS_FILTER\$" { + // ext.prefix = {"${meta.id}.filter"} + // ext.args = "-e 'GT=\"ref\"' --output-type z" + // } - withName: "^.*VCF_ANNOTATE_VEP_ANNOTSV:ANNOTSV_ANNOTSV\$" { - ext.args = {[ - "-SVminSize 20", - "-vcf 1", - meta.hpo ? "-hpo ${meta.hpo}" : "" - ].join(" ")} - ext.prefix = {"${meta.id}.annot"} - } + // withName: "^.*VCF_ANNOTATE_VEP_ANNOTSV:ANNOTSV_ANNOTSV\$" { + // ext.args = {[ + // "-SVminSize 20", + // "-vcf 1", + // meta.hpo ? "-hpo ${meta.hpo}" : "" + // ].join(" ")} + // ext.prefix = {"${meta.id}.annot"} + // } - withName: "^.*VCF_ANNOTATE_VEP_ANNOTSV:BCFTOOLS_CONCAT\$" { - ext.prefix = "annotsv_annotated_unsorted" - ext.args = "--output-type z --naive-force" - } + // withName: "^.*VCF_ANNOTATE_VEP_ANNOTSV:BCFTOOLS_CONCAT\$" { + // ext.prefix = "annotsv_annotated_unsorted" + // ext.args = "--output-type z --naive-force" + // } - withName: "^.*VCF_ANNOTATE_VEP_ANNOTSV:BCFTOOLS_SORT\$" { - ext.prefix = "annotsv_annotated" - ext.args = "--output-type z --write-index=tbi" - } + // withName: "^.*VCF_ANNOTATE_VEP_ANNOTSV:BCFTOOLS_SORT\$" { + // ext.prefix = "annotsv_annotated" + // ext.args = "--output-type z --write-index=tbi" + // } - withName: "^.*VCF_ANNOTATE_VEP_ANNOTSV:ENSEMBLVEP_VEP\$" { + withName: "^.*VCF_ANNOTATE:ENSEMBLVEP_VEP\$" { ext.prefix = {"${meta.id}.vep"} ext.args = {[ // specify we use VCF files @@ -259,10 +259,14 @@ process { ext.args = "-ends" } - withName: "^.*GATK4_SVANNOTATE\$" { + withName: "^.*VCF_ANNOTATE:GATK4_SVANNOTATE\$" { ext.prefix = {"${meta.id}.${meta.variant_type}.svannotate"} } + withName: "^.*VCF_ANNOTATE:TABIX_BGZIPTABIX\$" { + ext.prefix = {"${meta.id}.${meta.variant_type}.strvctvre"} + } + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SV AND CNV FILTERING diff --git a/conf/test.config b/conf/test.config index a9672f77..e9c43417 100644 --- a/conf/test.config +++ b/conf/test.config @@ -43,7 +43,7 @@ params { genomes_ignore = true genome = 'GRCh38' vep_cache = null - annotsv_annotations = null + // annotsv_annotations = null concat_output = true diff --git a/main.nf b/main.nf index 945f1d6f..aa55c83e 100644 --- a/main.nf +++ b/main.nf @@ -32,11 +32,13 @@ params.dict = getGenomeAttribute('dict') params.gtf = getGenomeAttribute('gtf') params.vep_cache = getGenomeAttribute('vep_cache') // params.bwa = getGenomeAttribute('bwa') -params.annotsv_annotations = getGenomeAttribute('annotsv_annotations') +// params.annotsv_annotations = getGenomeAttribute('annotsv_annotations') params.expansionhunter_catalog = getGenomeAttribute('expansionhunter_catalog') params.qdnaseq_male = getGenomeAttribute("qdnaseq_male_${params.qdnaseq_bin_size.toInteger() / 1000}kbp".toString()) params.qdnaseq_female = getGenomeAttribute("qdnaseq_female_${params.qdnaseq_bin_size.toInteger() / 1000}kbp".toString()) params.wisecondorx_reference = getGenomeAttribute('wisecondorx_reference') +params.strvctvre_phylop = getGenomeAttribute('strvctvre_phylop') +params.strvctvre_data = getGenomeAttribute('strvctvre_data') /* @@ -90,9 +92,9 @@ workflow { params.qdnaseq_male, params.wisecondorx_reference, params.vep_cache, - params.annotsv_annotations, - params.annotsv_candidate_genes, - params.annotsv_gene_transcripts, + // params.annotsv_annotations, + // params.annotsv_candidate_genes, + // params.annotsv_gene_transcripts, params.vcfanno_lua, params.vcfanno_resources, params.vcfanno_toml, @@ -101,6 +103,8 @@ workflow { "${projectDir}/assets/svync", "${projectDir}/assets/bedgovcf", "${projectDir}/assets/vcfanno", + params.strvctvre_phylop, + params.strvctvre_data, // booleans params.annotate, @@ -116,7 +120,8 @@ workflow { params.vep_assembly, params.vep_cache_version, params.filter, - params.outdir + params.outdir, + params.annotate_tools ? params.annotate_tools.tokenize(",") : [] ) // // SUBWORKFLOW: Run completion tasks diff --git a/modules.json b/modules.json index 6db33464..7631ad80 100644 --- a/modules.json +++ b/modules.json @@ -64,7 +64,8 @@ "expansionhunter": { "branch": "master", "git_sha": "b42fec6f7c6e5d0716685cabb825ef6bf6e386b5", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/expansionhunter/expansionhunter.diff" }, "gatk4/createsequencedictionary": { "branch": "master", @@ -138,10 +139,17 @@ "git_sha": "f2fdc5e81efe36beff213752ce1a8e71fceeacba", "installed_by": ["modules"] }, + "strvctvre/strvctvre": { + "branch": "master", + "git_sha": "e05456babfdf276f4771b140d04fb41eb2a2d13c", + "installed_by": ["modules"], + "patch": "modules/nf-core/strvctvre/strvctvre/strvctvre-strvctvre.diff" + }, "svtools/vcftobedpe": { "branch": "master", "git_sha": "5cf15e4c5ad6d06718b35e5ec1655c7c0a105880", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/svtools/vcftobedpe/svtools-vcftobedpe.diff" }, "svync": { "branch": "master", @@ -185,12 +193,12 @@ "nf-core": { "utils_nextflow_pipeline": { "branch": "master", - "git_sha": "d20fb2a9cc3e2835e9d067d1046a63252eb17352", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "2fdce49d30c0254f76bc0f13c55c17455c1251ab", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { diff --git a/modules/nf-core/expansionhunter/expansionhunter.diff b/modules/nf-core/expansionhunter/expansionhunter.diff new file mode 100644 index 00000000..c7f18c11 --- /dev/null +++ b/modules/nf-core/expansionhunter/expansionhunter.diff @@ -0,0 +1,44 @@ +Changes in component 'nf-core/expansionhunter' +Changes in 'expansionhunter/main.nf': +--- modules/nf-core/expansionhunter/main.nf ++++ modules/nf-core/expansionhunter/main.nf +@@ -15,6 +15,7 @@ + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf ++ tuple val(meta), path("*.vcf.gz.tbi") , emit: tbi + tuple val(meta), path("*.json.gz") , emit: json + tuple val(meta), path("*_realigned.bam") , emit: bam + path "versions.yml" , emit: versions +@@ -36,6 +37,7 @@ + --variant-catalog ${variant_catalog} + + bgzip --threads ${task.cpus} ${args2} ${prefix}.vcf ++ tabix ${prefix}.vcf.gz + bgzip --threads ${task.cpus} ${args2} ${prefix}.json + + cat <<-END_VERSIONS > versions.yml +@@ -49,6 +51,7 @@ + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz ++ touch ${prefix}.vcf.gz.tbi + echo "" | gzip > ${prefix}.json.gz + touch ${prefix}_realigned.bam + + +Changes in 'expansionhunter/environment.yml': +--- modules/nf-core/expansionhunter/environment.yml ++++ modules/nf-core/expansionhunter/environment.yml +@@ -1,5 +1,4 @@ + name: expansionhunter +- + channels: + - conda-forge + - bioconda + +'modules/nf-core/expansionhunter/meta.yml' is unchanged +'modules/nf-core/expansionhunter/tests/tags.yml' is unchanged +'modules/nf-core/expansionhunter/tests/main.nf.test' is unchanged +'modules/nf-core/expansionhunter/tests/main.nf.test.snap' is unchanged +************************************************************ diff --git a/modules/nf-core/strvctvre/strvctvre/environment.yml b/modules/nf-core/strvctvre/strvctvre/environment.yml new file mode 100644 index 00000000..283e2830 --- /dev/null +++ b/modules/nf-core/strvctvre/strvctvre/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::liftover=1.3.2 + - bioconda::strvctvre=1.10 diff --git a/modules/nf-core/strvctvre/strvctvre/main.nf b/modules/nf-core/strvctvre/strvctvre/main.nf new file mode 100644 index 00000000..5af4797a --- /dev/null +++ b/modules/nf-core/strvctvre/strvctvre/main.nf @@ -0,0 +1,76 @@ +process STRVCTVRE_STRVCTVRE { + tag "$meta.id" + label 'process_low' + + // When updating the version here, don't forget to update the hardcoded version in script and stub sections + // Version in help message does not match the actual version of the tool + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/9584eeb6569a511be29d0a07bf80103d59d38715ddb971dddeca0bc72aec41d3/data': + 'community.wave.seqera.io/library/liftover_strvctvre:5fec172b808cc48e' }" + + input: + tuple val(meta), path(sv_file), path(sv_file_index), val(assembly) + tuple val(meta2), path(phylop) + tuple val(meta3), path(data_directory) + + output: + tuple val(meta), path("*.vcf"), emit: vcf, optional: true + tuple val(meta), path("*.bed"), emit: bed, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.10' + def format = '' + if (sv_file.name.endsWith('.vcf') || sv_file.name.endsWith('.vcf.gz')) { + format = 'vcf' + } else if (sv_file.name.endsWith('.bed')) { + format = 'bed' + } else { + error("Input structural variants file must be in VCF or BED format") + } + if (!['GRCh38', 'GRCh37'].contains(assembly)) { + error("Assembly must be either 'GRCh37' or 'GRCh38'") + } + """ + StrVCTVRE.py \\ + --input ${sv_file} \\ + --format ${format} \\ + --phyloP ${phylop} \\ + --assembly ${assembly} \\ + --liftover liftover_hg19_to_hg38_public.py \\ + --output ${prefix}.${format} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + strvctvre: '$VERSION' + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.10' + def format = '' + if (sv_file.name.endsWith('.vcf') || sv_file.name.endsWith('.vcf.gz')) { + format = 'vcf' + } else if (sv_file.name.endsWith('.bed')) { + format = 'bed' + } else { + error("Input structural variants file must be in VCF or BED format") + } + if (!['GRCh38', 'GRCh37'].contains(assembly)) { + error("Assembly must be either 'GRCh37' or 'GRCh38'") + } + """ + touch ${prefix}.${format} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + strvctvre: '$VERSION' + END_VERSIONS + """ +} diff --git a/modules/nf-core/strvctvre/strvctvre/meta.yml b/modules/nf-core/strvctvre/strvctvre/meta.yml new file mode 100644 index 00000000..1a4718b4 --- /dev/null +++ b/modules/nf-core/strvctvre/strvctvre/meta.yml @@ -0,0 +1,81 @@ +name: "strvctvre_strvctvre" +description: a structural variant classifier for exonic deletions and + duplications +keywords: + - structural variants + - sv + - deletions + - duplications + - annotations +tools: + - "strvctvre": + description: "StrVCTVRE, a structural variant classifier for exonic deletions + and duplications" + homepage: "https://github.com/andrewSharo/StrVCTVRE/tree/master" + documentation: "https://github.com/andrewSharo/StrVCTVRE/tree/master" + tool_dev_url: "https://github.com/andrewSharo/StrVCTVRE/tree/master" + licence: ["MIT"] + identifier: biotools:strvctvre + +input: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ + id:'sample1' ]` + - sv_file: + type: file + description: Structural variants file in VCF or BED format + pattern: "*.{vcf,bed}" + - sv_file_index: + type: file + description: Index file for the structural variants file VCF file + pattern: "*.tbi" + - assembly: + type: string + description: Genome assembly version, has to be one of 'GRCh38' or 'GRCh37' + enum: ["GRCh38", "GRCh37"] + - - meta2: + type: map + description: Groovy Map containing sample information. e.g. `[ + id:'sample1' ]` + - phylop: + type: file + description: PhyloP bigWig file (fetched from https://hgdownload.cse.ucsc.edu/goldenpath/hg38/phyloP100way/) + pattern: "*.bw" + - - meta3: + type: map + description: Groovy Map containing sample information. e.g. `[ + id:'sample1' ]` + - data_directory: + type: directory + description: Directory containing the StrVCTVRE data files (fetched from https://github.com/andrewSharo/StrVCTVRE/tree/master/data) +output: + vcf: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ + id:'sample1' ]` + - "*.vcf": + type: file + description: Structural variants file in VCF format + pattern: "*.vcf" + bed: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ + id:'sample1' ]` + - "*.bed": + type: file + description: Structural variants file in BED format + pattern: "*.bed" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: versions.yml + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/strvctvre/strvctvre/strvctvre-strvctvre.diff b/modules/nf-core/strvctvre/strvctvre/strvctvre-strvctvre.diff new file mode 100644 index 00000000..3875c6d1 --- /dev/null +++ b/modules/nf-core/strvctvre/strvctvre/strvctvre-strvctvre.diff @@ -0,0 +1,28 @@ +Changes in component 'nf-core/strvctvre/strvctvre' +'modules/nf-core/strvctvre/strvctvre/main.nf' is unchanged +'modules/nf-core/strvctvre/strvctvre/environment.yml' is unchanged +Changes in 'strvctvre/strvctvre/meta.yml': +--- modules/nf-core/strvctvre/strvctvre/meta.yml ++++ modules/nf-core/strvctvre/strvctvre/meta.yml +@@ -22,7 +22,7 @@ + type: map + description: Groovy Map containing sample information. e.g. `[ + id:'sample1' ]` +- - sv_file: ++ - sv_file: + type: file + description: Structural variants file in VCF or BED format + pattern: "*.{vcf,bed}" +@@ -33,7 +33,7 @@ + - assembly: + type: string + description: Genome assembly version, has to be one of 'GRCh38' or 'GRCh37' +- enum: [ 'GRCh38', 'GRCh37' ] ++ enum: ["GRCh38", "GRCh37"] + - - meta2: + type: map + description: Groovy Map containing sample information. e.g. `[ + +'modules/nf-core/strvctvre/strvctvre/tests/main.nf.test' is unchanged +'modules/nf-core/strvctvre/strvctvre/tests/main.nf.test.snap' is unchanged +************************************************************ diff --git a/modules/nf-core/strvctvre/strvctvre/tests/main.nf.test b/modules/nf-core/strvctvre/strvctvre/tests/main.nf.test new file mode 100644 index 00000000..eef746f6 --- /dev/null +++ b/modules/nf-core/strvctvre/strvctvre/tests/main.nf.test @@ -0,0 +1,88 @@ +nextflow_process { + + name "Test Process STRVCTVRE_STRVCTVRE" + script "../main.nf" + process "STRVCTVRE_STRVCTVRE" + + tag "modules" + tag "modules_nfcore" + tag "strvctvre" + tag "strvctvre/strvctvre" + + // Only stub tests because of the big dependencies + + test("homo_sapiens - vcf - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/sv_query.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/sv_query.vcf.gz.tbi', checkIfExists: true), + "GRCh38" + ] + input[1] = [ + [ id: 'ref' ], + [] + ] + input[2] = [ + [ id: 'data' ], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + path(process.out.versions[0]).yaml + ).match() } + ) + } + + } + + test("homo_sapiens - bed - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true), + [], + "GRCh38" + ] + input[1] = [ + [ id: 'ref' ], + [] + ] + input[2] = [ + [ id: 'data' ], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + path(process.out.versions[0]).yaml + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/strvctvre/strvctvre/tests/main.nf.test.snap b/modules/nf-core/strvctvre/strvctvre/tests/main.nf.test.snap new file mode 100644 index 00000000..6629068d --- /dev/null +++ b/modules/nf-core/strvctvre/strvctvre/tests/main.nf.test.snap @@ -0,0 +1,94 @@ +{ + "homo_sapiens - bed - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,bddaa896254d01da76c2ddb9e0d160d2" + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + + ], + "versions": [ + "versions.yml:md5,bddaa896254d01da76c2ddb9e0d160d2" + ] + }, + { + "STRVCTVRE_STRVCTVRE": { + "strvctvre": "1.10" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-17T16:42:34.382580251" + }, + "homo_sapiens - vcf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,bddaa896254d01da76c2ddb9e0d160d2" + ], + "bed": [ + + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,bddaa896254d01da76c2ddb9e0d160d2" + ] + }, + { + "STRVCTVRE_STRVCTVRE": { + "strvctvre": "1.10" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-17T16:42:26.80025963" + } +} \ No newline at end of file diff --git a/modules/nf-core/svtools/vcftobedpe/svtools-vcftobedpe.diff b/modules/nf-core/svtools/vcftobedpe/svtools-vcftobedpe.diff new file mode 100644 index 00000000..3ea78c96 --- /dev/null +++ b/modules/nf-core/svtools/vcftobedpe/svtools-vcftobedpe.diff @@ -0,0 +1,66 @@ +Changes in component 'nf-core/svtools/vcftobedpe' +Changes in 'svtools/vcftobedpe/main.nf': +--- modules/nf-core/svtools/vcftobedpe/main.nf ++++ modules/nf-core/svtools/vcftobedpe/main.nf +@@ -22,7 +22,7 @@ + def prefix = task.ext.prefix ?: "${meta.id}" + """ + svtools vcftobedpe \\ +- $args ++ $args \\ + --input $vcf \\ + --output ${prefix}.bedpe \\ + --tempdir ./tmp + +'modules/nf-core/svtools/vcftobedpe/environment.yml' is unchanged +Changes in 'svtools/vcftobedpe/meta.yml': +--- modules/nf-core/svtools/vcftobedpe/meta.yml ++++ modules/nf-core/svtools/vcftobedpe/meta.yml +@@ -12,7 +12,7 @@ + - "svtools": + description: "Tools for processing and analyzing structural variants" + tool_dev_url: "https://github.com/hall-lab/svtools" +- licence: ['MIT License'] ++ licence: ["MIT License"] + + input: + - - meta: +@@ -27,21 +27,21 @@ + + output: + - bedpe: +- - meta: +- type: map +- description: | +- Groovy Map containing sample information +- e.g. `[ id:'sample1', single_end:false ]` +- - "*.bedpe": +- type: file +- description: The converted BEDPE file +- pattern: "*.bedpe" ++ - meta: ++ type: map ++ description: | ++ Groovy Map containing sample information ++ e.g. `[ id:'sample1', single_end:false ]` ++ - "*.bedpe": ++ type: file ++ description: The converted BEDPE file ++ pattern: "*.bedpe" + + - versions: +- - "versions.yml": +- type: file +- description: File containing software versions +- pattern: "versions.yml" ++ - "versions.yml": ++ type: file ++ description: File containing software versions ++ pattern: "versions.yml" + + authors: + - "@nvnieuwk" + +'modules/nf-core/svtools/vcftobedpe/tests/main.nf.test' is unchanged +'modules/nf-core/svtools/vcftobedpe/tests/main.nf.test.snap' is unchanged +************************************************************ diff --git a/nextflow.config b/nextflow.config index 52c6bef7..87a9e62a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -19,6 +19,7 @@ params { sv_callers_support = 1 cnv_callers_support = 1 annotate = false + annotate_tools = "all" concat_output = false filter = null bedpe = false @@ -35,8 +36,8 @@ params { qdnaseq_cnv_ratio = 0.5 // AnnotSV options - annotsv_candidate_genes = null - annotsv_gene_transcripts = null + // annotsv_candidate_genes = null + // annotsv_gene_transcripts = null // VEP options vep_assembly = "GRCh38" @@ -77,7 +78,6 @@ params { help_full = false show_hidden = false version = false - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')// Config options config_profile_name = null config_profile_description = null @@ -206,8 +206,7 @@ profiles { } // Load nf-core custom profiles from different Institutions -includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" - +includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled // Set to your registry if you have a mirror of containers diff --git a/nextflow_schema.json b/nextflow_schema.json index deb02016..b075ad3d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -121,6 +121,20 @@ "format": "file-path", "mimetype": "application/octet-stream" }, + "strvctvre_phylop": { + "type": "string", + "format": "file-path", + "pattern": "^\\S+\\.bw$", + "exists": true, + "description": "Path to the StrVCTVRE phylo bigwig file" + }, + "strvctvre_data": { + "type": "string", + "format": "directory-path", + "pattern": "^\\S+$", + "exists": true, + "description": "Path to the StrVCTVRE data directory" + }, "blacklist": { "type": "string", "pattern": "^\\S+\\.bed$", @@ -301,12 +315,6 @@ "type": "boolean", "description": "Output monochrome logs" }, - "pipelines_testdata_base_path": { - "type": "string", - "hidden": true, - "description": "The base path to of the pipeline test data", - "default": "https://raw.githubusercontent.com/nf-core/test-datasets/" - }, "trace_report_suffix": { "type": "string", "hidden": true @@ -323,7 +331,8 @@ "type": "string", "default": "manta,smoove,delly,expansionhunter,wisecondorx", "description": "A comma-seperated list of callers to use. Can be one or more these: smoove/delly/manta/expansionhunter/qdnaseq/wisecondorx.", - "help_text": "Following keywords can also be used as shortcuts to certain callers: \n- all: Use all callers\n- sv: Use all SV callers\n- cnv: Use all CNV callers\n- rre: Use all RRE callers" + "help_text": "Following keywords can also be used as shortcuts to certain callers: \n- all: Use all callers\n- sv: Use all SV callers\n- cnv: Use all CNV callers\n- rre: Use all RRE callers", + "pattern": "^((manta|delly|smoove|wisecondorx|qdnaseq|expansionhunter|all|sv|cnv|rre)?,?)*(?\n\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\n\nNow, you can run the pipeline using:\n\n\n\n```bash\nnextflow run nf-cmgg/structural \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\n## Credits\n\nnf-cmgg/structural was originally written by nvnieuwk.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "# nf-cmgg/structural\n\n[![GitHub Actions CI Status](https://github.com/nf-cmgg/structural/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-cmgg/structural/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-cmgg/structural/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-cmgg/structural/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A525.04.0-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-cmgg/structural)\n\n## Introduction\n\n**nf-cmgg/structural** is a bioinformatics best-practice analysis pipeline for calling structural variants (SVs), copy number variants (CNVs) and repeat region expansions (RREs) from short DNA reads. The pipeline handles the calling of the variants and postprocessing (filtering, annotating...)\n\nPlease have a look at the [documentation](https://nf-cmgg.github.io/structural/latest/) on how to run the pipeline\n", "hasPart": [ { "@id": "main.nf" diff --git a/subworkflows/local/bam_variant_calling_gridss/main.nf b/subworkflows/local/bam_variant_calling_gridss/main.nf.disabled similarity index 100% rename from subworkflows/local/bam_variant_calling_gridss/main.nf rename to subworkflows/local/bam_variant_calling_gridss/main.nf.disabled diff --git a/subworkflows/local/vcf_annotate/main.nf b/subworkflows/local/vcf_annotate/main.nf new file mode 100644 index 00000000..4b4aeba3 --- /dev/null +++ b/subworkflows/local/vcf_annotate/main.nf @@ -0,0 +1,100 @@ +include { VCF_ANNOTATE_VCFANNO } from '../../../subworkflows/local/vcf_annotate_vcfanno/main' + +include { ENSEMBLVEP_VEP } from '../../../modules/nf-core/ensemblvep/vep/main' +include { GATK4_SVANNOTATE } from '../../../modules/nf-core/gatk4/svannotate/main' +include { STRVCTVRE_STRVCTVRE } from '../../../modules/nf-core/strvctvre/strvctvre/main' +include { TABIX_BGZIPTABIX } from '../../../modules/nf-core/tabix/bgziptabix/main' + +workflow VCF_ANNOTATE { + take: + vcfs // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ] VCFs containing the called structural variants + fasta // channel: [mandatory] [ val(meta), path(fasta) ] => The fasta reference file + fai // channel: [optional] [ val(meta), path(fai) ] => The fasta index file + dict // channel: [optional] [ val(meta), path(dict) ] => The fasta dict file + gtf // channel: [optional] [ val(meta), path(gtf) ] => The preprocessed GTF file for SVAnnotate + vep_cache // channel: [optional] [ path(cache) ] => The path to the local VEP cache + vep_extra_files // channel: [optional] [ path(file1, file2, file3...) ] => The VEP extra files + vcfanno_lua // channel: [optional] [ path(lua) ] => The lua script to influence VCFanno + vcfanno_toml // channel: [optional] [ path(toml) ] => A vcfanno TOML config + strvctvre_phylop // channel: [optional] [ val(meta), path(phylop) ] => The phylop bigwig file for StrVCTVRE + strvctvre_data // channel: [optional] [ val(meta), path(data_dir) ] => The data directory for StrVCTVRE + genome // string: [mandatory] => The genome used by the variant callers + species // string: [mandatory] => The species used by VEP + vep_cache_version // integer: [mandatory] => The VEP cache version to use + vcfanno_resources // list: [optional] [ path(file1, file2, file3...) ] => The extra VCFanno files + vcfanno_default_tomls // list: [mandatory] => A list of default vcfanno configs to be concatenated with the input TOML + tools // list: [optional] => The tools used for annotation (default: all tools) + + main: + def ch_versions = Channel.empty() + def ch_reports = Channel.empty() + + def ch_vep = vcfs + if( tools.contains("vep") || tools.contains("all") ) { + ENSEMBLVEP_VEP( + vcfs.map { meta, vcf, _tbi -> [ meta, vcf, []]}, + genome, + species, + vep_cache_version, + vep_cache, + fasta, + vep_extra_files + ) + ch_versions = ch_versions.mix(ENSEMBLVEP_VEP.out.versions.first()) + ch_reports = ch_reports.mix(ENSEMBLVEP_VEP.out.report) + ch_vep = ENSEMBLVEP_VEP.out.vcf.join(ENSEMBLVEP_VEP.out.tbi, failOnDuplicate:true, failOnMismatch:true) + } + + def ch_vcfanno = ch_vep + if( tools.contains("vcfanno") || tools.contains("all") ) { + VCF_ANNOTATE_VCFANNO( + ch_vep, + vcfanno_lua, + vcfanno_resources, + vcfanno_toml, + vcfanno_default_tomls + ) + ch_versions = ch_versions.mix(VCF_ANNOTATE_VCFANNO.out.versions) + ch_vcfanno = VCF_ANNOTATE_VCFANNO.out.vcfs + } + + def ch_svannotate = ch_vcfanno + if( tools.contains("svannotate") || tools.contains("all") ) { + def ch_svannotate_input = ch_vcfanno + .map { meta, vcf, tbi -> + [ meta, vcf, tbi, [], [] ] // TODO add BED files + } + + GATK4_SVANNOTATE( + ch_svannotate_input, + fasta, + fai, + dict, + gtf + ) + ch_versions = ch_versions.mix(GATK4_SVANNOTATE.out.versions.first()) + ch_svannotate = GATK4_SVANNOTATE.out.vcf.join(GATK4_SVANNOTATE.out.tbi, failOnMismatch:true, failOnDuplicate:true) + } + + def ch_strvctvre = ch_svannotate + if( tools.contains("strvctvre") || tools.contains("all") ) { + STRVCTVRE_STRVCTVRE( + ch_svannotate.map { meta, vcf, tbi -> [ meta, vcf, tbi, genome ] }, + strvctvre_phylop, + strvctvre_data + ) + ch_versions = ch_versions.mix(STRVCTVRE_STRVCTVRE.out.versions.first()) + + TABIX_BGZIPTABIX( + STRVCTVRE_STRVCTVRE.out.vcf + ) + ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions.first()) + + ch_strvctvre = TABIX_BGZIPTABIX.out.gz_tbi + } + + emit: + vcfs = ch_strvctvre + versions = ch_versions + reports = ch_reports +} diff --git a/subworkflows/local/vcf_annotate_vcfanno/main.nf b/subworkflows/local/vcf_annotate_vcfanno/main.nf index 76fd976c..6eef994b 100644 --- a/subworkflows/local/vcf_annotate_vcfanno/main.nf +++ b/subworkflows/local/vcf_annotate_vcfanno/main.nf @@ -7,48 +7,39 @@ include { BCFTOOLS_FILTER } from '../../../modules/nf-core/bcftools/filter/mai workflow VCF_ANNOTATE_VCFANNO { take: - ch_vcfs // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ] VCFs containing the called structural variants - ch_sample_specific_resources // channel: [optional] [ val(meta), path(vcf), path(tbi) ] Files containing resources that are sample-specific + ch_vcfs // channel: [mandatory] [ val(meta), path(vcf), path(tbi), path(specific_resources) ] VCFs containing the called structural variants ch_vcfanno_lua // channel: [optional] [ path(lua) ] => The lua script to influence VCFanno val_vcfanno_resources // list: [optional] [ path(file1, file2, file3...) ] => The extra VCFanno files vcfanno_toml // file: [optional] => A vcfanno TOML config default_vcfanno_tomls // list: [mandatory] => A list of default vcfanno configs to be concatenated with the input TOML - annotate // boolean: [mandatory] => Whether or not to run the full annotation or only the specified annotations main: def ch_versions = Channel.empty() + def ch_annotated_vcfs = Channel.empty() - def ch_vcfanno_toml = Channel.empty() - def ch_vcfanno_input = Channel.empty() - if(annotate) { - ch_vcfanno_toml = Channel.fromList(create_vcfanno_toml(val_vcfanno_resources, vcfanno_toml, default_vcfanno_tomls)) + def val_toml = create_vcfanno_toml(val_vcfanno_resources, vcfanno_toml, default_vcfanno_tomls) + if( val_toml ) { + def ch_vcfanno_toml = Channel.fromList(val_toml) .collectFile(name:"vcfanno.toml", newLine:true) .collect() - def ch_collected_specific_resources = ch_sample_specific_resources.map { entry -> - [ entry[0], entry[1..-1].findAll { res_file -> res_file != [] } ] - } - ch_vcfanno_input = ch_vcfs.join(ch_collected_specific_resources, failOnMismatch:true, failOnDuplicate:true) - } else { - ch_vcfanno_toml = Channel.fromPath(vcfanno_toml).collect() - ch_vcfanno_input = ch_vcfs.map { meta, vcf, tbi -> - [ meta, vcf, tbi, [] ] - } - } - VCFANNO( - ch_vcfanno_input, - ch_vcfanno_toml, - ch_vcfanno_lua, - val_vcfanno_resources ? Channel.fromList(val_vcfanno_resources).collect() : [] - ) - ch_versions = ch_versions.mix(VCFANNO.out.versions) + VCFANNO( + ch_vcfs, + ch_vcfanno_toml, + ch_vcfanno_lua, + val_vcfanno_resources ? Channel.fromList(val_vcfanno_resources).collect() : [] + ) + ch_versions = ch_versions.mix(VCFANNO.out.versions.first()) - def ch_annotated_vcfs = VCFANNO.out.vcf.join(VCFANNO.out.tbi, failOnDuplicate:true, failOnMismatch:true) + ch_annotated_vcfs = VCFANNO.out.vcf.join(VCFANNO.out.tbi, failOnDuplicate:true, failOnMismatch:true) + } else { + // If no TOML is provided, skip VCFANNO and just pass the input VCFs to output + ch_annotated_vcfs = ch_vcfs + } emit: vcfs = ch_annotated_vcfs // channel: [ val(meta), path(vcf), path(tbi) ] - versions = ch_versions } @@ -70,7 +61,7 @@ def create_vcfanno_toml(vcfanno_resources, input_vcfanno_toml, List vcfann if (postannotation != []){ output.add(postannotation) } - return output.flatten() + return output.findAll { value -> value != null }.flatten() } def parse_toml(tomls) { diff --git a/subworkflows/local/vcf_annotate_vep_annotsv/main.nf b/subworkflows/local/vcf_annotate_vep_annotsv/main.nf index 724e09ec..676ba966 100644 --- a/subworkflows/local/vcf_annotate_vep_annotsv/main.nf +++ b/subworkflows/local/vcf_annotate_vep_annotsv/main.nf @@ -1,6 +1,8 @@ // // Annotate the VCFs // +// Deprecated subworkflow: keep this in the pipeline until a final decision is made on whether to keep using AnnotSV +// include { BCFTOOLS_SPLIT_BY_SVTYPE } from '../../../modules/local/bcftools/split_by_svtype' include { BCFTOOLS_CONSENSUS_REHEADER } from '../../../modules/local/bcftools/consensus_reheader' diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml deleted file mode 100644 index f8476112..00000000 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/utils_nextflow_pipeline: - - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml deleted file mode 100644 index ac8523c9..00000000 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/utils_nfcore_pipeline: - - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/tests/nextflow.config b/tests/nextflow.config index 2751dfd5..9c8a10a6 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -14,6 +14,8 @@ params { config_profile_description = 'Configurations for running nf-test' input = "${baseDir}/tests/samplesheet.csv" + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' // References for test data fasta = "https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/genome/seq/SVcontrol/reference.fasta" diff --git a/tests/subworkflows/local/vcf_annotate/main.nf.test b/tests/subworkflows/local/vcf_annotate/main.nf.test new file mode 100644 index 00000000..54c8b88f --- /dev/null +++ b/tests/subworkflows/local/vcf_annotate/main.nf.test @@ -0,0 +1,353 @@ +nextflow_workflow { + + name "Test Workflow VCF_ANNOTATE" + script "subworkflows/local/vcf_annotate/main.nf" + workflow "VCF_ANNOTATE" + + tag "subworkflows" + tag "subworkflows_local" + tag "vcf_annotate" + + test("homo_sapiens - all") { + + options "-stub" + + when { + params { + genome = "GRCh38" + species = "homo_sapiens" + vep_cache_version = 110 + } + workflow { + """ + def meta = [id:"test", sample:"test", sex:"male", family: "test1", family_count:1, hpo:[]] + input[0] = Channel.of([ + meta + [variant_type:"sv"], + file(params.sv_vcf1, checkIfExists:true), + file(params.sv_tbi1, checkIfExists:true) + ]) // vcfs + input[1] = Channel.value([ + [id:"fasta"], + file(params.fasta, checkIfExists: true) + ]) // fasta + input[2] = Channel.value([ + [id:"fai"], + file(params.fai, checkIfExists: true) + ]) // fai + input[3] = Channel.value([ + [id:"dict"], + file(params.dict, checkIfExists: true) + ]) // dict + input[4] = Channel.value([ + [id:"gtf"], + file(params.gtf, checkIfExists: true) + ]) // gtf + input[5] = [] // vep_cache + input[6] = [] // vep_extra_files + input[7] = [] // vcfanno_lua + input[8] = [] // vcfanno_toml + input[9] = [[:], []] // strvctvre_phylop + input[10] = [[:], []] // strvctvre_data + input[11] = params.genome + input[12] = params.species + input[13] = params.vep_cache_version + input[14] = [] // vcfanno_resources + input[15] = file("${baseDir}/assets/vcfanno/*.toml") // vcfanno_default_tomls + input[16] = ["all"] // tools + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.collectEntries { key, value -> + if(key.matches("^\\d+\$")) { + return + } + if(key == "versions") { + return [ key, value.collect { path(it).yaml } ] + } + return [ key, value ] + } + ).match() } + ) + } + + } + + test("homo_sapiens - vep") { + + options "-stub" + + when { + params { + genome = "GRCh38" + species = "homo_sapiens" + vep_cache_version = 110 + } + workflow { + """ + def meta = [id:"test", sample:"test", sex:"male", family: "test1", family_count:1, hpo:[]] + input[0] = Channel.of([ + meta + [variant_type:"sv"], + file(params.sv_vcf1, checkIfExists:true), + file(params.sv_tbi1, checkIfExists:true) + ]) // vcfs + input[1] = Channel.value([ + [id:"fasta"], + file(params.fasta, checkIfExists: true) + ]) // fasta + input[2] = Channel.value([ + [id:"fai"], + file(params.fai, checkIfExists: true) + ]) // fai + input[3] = Channel.value([ + [id:"dict"], + file(params.dict, checkIfExists: true) + ]) // dict + input[4] = Channel.value([ + [id:"gtf"], + file(params.gtf, checkIfExists: true) + ]) // gtf + input[5] = [] // vep_cache + input[6] = [] // vep_extra_files + input[7] = [] // vcfanno_lua + input[8] = [] // vcfanno_toml + input[9] = [[:], []] // strvctvre_phylop + input[10] = [[:], []] // strvctvre_data + input[11] = params.genome + input[12] = params.species + input[13] = params.vep_cache_version + input[14] = [] // vcfanno_resources + input[15] = file("${baseDir}/assets/vcfanno/*.toml") // vcfanno_default_tomls + input[16] = ["vep"] // tools + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.collectEntries { key, value -> + if(key.matches("^\\d+\$")) { + return + } + if(key == "versions") { + return [ key, value.collect { path(it).yaml } ] + } + return [ key, value ] + } + ).match() } + ) + } + + } + + test("homo_sapiens - vcfanno") { + + when { + params { + genome = "GRCh38" + species = "homo_sapiens" + vep_cache_version = 110 + } + workflow { + """ + def meta = [id:"test", sample:"test", sex:"male", family: "test1", family_count:1, hpo:[]] + input[0] = Channel.of([ + meta + [variant_type:"sv"], + file(params.sv_vcf1, checkIfExists:true), + file(params.sv_tbi1, checkIfExists:true) + ]) // vcfs + input[1] = Channel.value([ + [id:"fasta"], + file(params.fasta, checkIfExists: true) + ]) // fasta + input[2] = Channel.value([ + [id:"fai"], + file(params.fai, checkIfExists: true) + ]) // fai + input[3] = Channel.value([ + [id:"dict"], + file(params.dict, checkIfExists: true) + ]) // dict + input[4] = Channel.value([ + [id:"gtf"], + file(params.gtf, checkIfExists: true) + ]) // gtf + input[5] = [] // vep_cache + input[6] = [] // vep_extra_files + input[7] = [] // vcfanno_lua + input[8] = [] // vcfanno_toml + input[9] = [[:], []] // strvctvre_phylop + input[10] = [[:], []] // strvctvre_data + input[11] = params.genome + input[12] = params.species + input[13] = params.vep_cache_version + input[14] = [] // vcfanno_resources + input[15] = file("${baseDir}/assets/vcfanno/*.toml") // vcfanno_default_tomls + input[16] = ["vcfanno"] // tools + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.collectEntries { key, value -> + if(key.matches("^\\d+\$")) { + return + } + if(key == "versions") { + return [ key, value.collect { path(it).yaml } ] + } + return [ key, value ] + } + ).match() } + ) + } + + } + + test("homo_sapiens - svannotate") { + + options "-stub" + + when { + params { + genome = "GRCh38" + species = "homo_sapiens" + vep_cache_version = 110 + } + workflow { + """ + def meta = [id:"test", sample:"test", sex:"male", family: "test1", family_count:1, hpo:[]] + input[0] = Channel.of([ + meta + [variant_type:"sv"], + file(params.sv_vcf1, checkIfExists:true), + file(params.sv_tbi1, checkIfExists:true) + ]) // vcfs + input[1] = Channel.value([ + [id:"fasta"], + file(params.fasta, checkIfExists: true) + ]) // fasta + input[2] = Channel.value([ + [id:"fai"], + file(params.fai, checkIfExists: true) + ]) // fai + input[3] = Channel.value([ + [id:"dict"], + file(params.dict, checkIfExists: true) + ]) // dict + input[4] = Channel.value([ + [id:"gtf"], + file(params.gtf, checkIfExists: true) + ]) // gtf + input[5] = [] // vep_cache + input[6] = [] // vep_extra_files + input[7] = [] // vcfanno_lua + input[8] = [] // vcfanno_toml + input[9] = [[:], []] // strvctvre_phylop + input[10] = [[:], []] // strvctvre_data + input[11] = params.genome + input[12] = params.species + input[13] = params.vep_cache_version + input[14] = [] // vcfanno_resources + input[15] = file("${baseDir}/assets/vcfanno/*.toml") // vcfanno_default_tomls + input[16] = ["svannotate"] // tools + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.collectEntries { key, value -> + if(key.matches("^\\d+\$")) { + return + } + if(key == "versions") { + return [ key, value.collect { path(it).yaml } ] + } + return [ key, value ] + } + ).match() } + ) + } + + } + + test("homo_sapiens - strvctvre") { + + options "-stub" + + when { + params { + genome = "GRCh38" + species = "homo_sapiens" + vep_cache_version = 110 + } + workflow { + """ + def meta = [id:"test", sample:"test", sex:"male", family: "test1", family_count:1, hpo:[]] + input[0] = Channel.of([ + meta + [variant_type:"sv"], + file(params.sv_vcf1, checkIfExists:true), + file(params.sv_tbi1, checkIfExists:true) + ]) // vcfs + input[1] = Channel.value([ + [id:"fasta"], + file(params.fasta, checkIfExists: true) + ]) // fasta + input[2] = Channel.value([ + [id:"fai"], + file(params.fai, checkIfExists: true) + ]) // fai + input[3] = Channel.value([ + [id:"dict"], + file(params.dict, checkIfExists: true) + ]) // dict + input[4] = Channel.value([ + [id:"gtf"], + file(params.gtf, checkIfExists: true) + ]) // gtf + input[5] = [] // vep_cache + input[6] = [] // vep_extra_files + input[7] = [] // vcfanno_lua + input[8] = [] // vcfanno_toml + input[9] = [[:], []] // strvctvre_phylop + input[10] = [[:], []] // strvctvre_data + input[11] = params.genome + input[12] = params.species + input[13] = params.vep_cache_version + input[14] = [] // vcfanno_resources + input[15] = file("${baseDir}/assets/vcfanno/*.toml") // vcfanno_default_tomls + input[16] = ["strvctvre"] // tools + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.collectEntries { key, value -> + if(key.matches("^\\d+\$")) { + return + } + if(key == "versions") { + return [ key, value.collect { path(it).yaml } ] + } + return [ key, value ] + } + ).match() } + ) + } + + } +} diff --git a/tests/subworkflows/local/vcf_annotate/main.nf.test.snap b/tests/subworkflows/local/vcf_annotate/main.nf.test.snap new file mode 100644 index 00000000..a271411c --- /dev/null +++ b/tests/subworkflows/local/vcf_annotate/main.nf.test.snap @@ -0,0 +1,210 @@ +{ + "homo_sapiens - strvctvre": { + "content": [ + { + "reports": [ + + ], + "vcfs": [ + [ + { + "id": "test", + "sample": "test", + "sex": "male", + "family": "test1", + "family_count": 1, + "hpo": [ + + ], + "variant_type": "sv" + }, + "test.sv.strvctvre.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.sv.strvctvre.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + { + "VCF_ANNOTATE:TABIX_BGZIPTABIX": { + "tabix": "1.19.1" + } + }, + { + "VCF_ANNOTATE:STRVCTVRE_STRVCTVRE": { + "strvctvre": "1.10" + } + } + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-18T16:39:18.734993799" + }, + "homo_sapiens - svannotate": { + "content": [ + { + "reports": [ + + ], + "vcfs": [ + [ + { + "id": "test", + "sample": "test", + "sex": "male", + "family": "test1", + "family_count": 1, + "hpo": [ + + ], + "variant_type": "sv" + }, + "test.sv.svannotate.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.sv.svannotate.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + { + "VCF_ANNOTATE:GATK4_SVANNOTATE": { + "gatk4": "4.6.1.0" + } + } + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-18T16:37:20.479982511" + }, + "homo_sapiens - vcfanno": { + "content": [ + { + "reports": [ + + ], + "vcfs": [ + [ + { + "id": "test", + "sample": "test", + "sex": "male", + "family": "test1", + "family_count": 1, + "hpo": [ + + ], + "variant_type": "sv" + }, + "/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/vcf/SVcontrol/sv/PosCon1.sv.vcf.gz", + "/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/vcf/SVcontrol/sv/PosCon1.sv.vcf.gz.tbi" + ] + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-10-07T16:43:39.153654599" + }, + "homo_sapiens - all": { + "content": [ + { + "reports": [ + "test.vep_summary.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "vcfs": [ + [ + { + "id": "test", + "sample": "test", + "sex": "male", + "family": "test1", + "family_count": 1, + "hpo": [ + + ], + "variant_type": "sv" + }, + "test.sv.strvctvre.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.sv.strvctvre.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + { + "VCF_ANNOTATE:TABIX_BGZIPTABIX": { + "tabix": "1.19.1" + } + }, + { + "VCF_ANNOTATE:ENSEMBLVEP_VEP": { + "ensemblvep": 115.1, + "tabix": 1.21 + } + }, + { + "VCF_ANNOTATE:GATK4_SVANNOTATE": { + "gatk4": "4.6.1.0" + } + }, + { + "VCF_ANNOTATE:STRVCTVRE_STRVCTVRE": { + "strvctvre": "1.10" + } + } + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-10-07T16:48:37.822101668" + }, + "homo_sapiens - vep": { + "content": [ + { + "reports": [ + "test.vep_summary.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "vcfs": [ + [ + { + "id": "test", + "sample": "test", + "sex": "male", + "family": "test1", + "family_count": 1, + "hpo": [ + + ], + "variant_type": "sv" + }, + "test.vep.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.vep.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + { + "VCF_ANNOTATE:ENSEMBLVEP_VEP": { + "ensemblvep": 115.1, + "tabix": 1.21 + } + } + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-18T16:35:38.9355384" + } +} \ No newline at end of file diff --git a/tests/subworkflows/local/vcf_annotate_vcfanno/main.nf.test b/tests/subworkflows/local/vcf_annotate_vcfanno/main.nf.test index 544cf056..c2e594af 100644 --- a/tests/subworkflows/local/vcf_annotate_vcfanno/main.nf.test +++ b/tests/subworkflows/local/vcf_annotate_vcfanno/main.nf.test @@ -19,18 +19,13 @@ nextflow_workflow { input[0] = Channel.of([ meta, file(params.sv_vcf1, checkIfExists:true), - file(params.sv_tbi1, checkIfExists:true) - ]) - input[1] = Channel.of([ - meta, - [], + file(params.sv_tbi1, checkIfExists:true), [] ]) + input[1] = [] input[2] = [] input[3] = [] - input[4] = [] - input[5] = files("\${params.vcfanno_tomls}/*.toml", checkIfExists:true) - input[6] = true + input[4] = files("\${params.vcfanno_tomls}/*.toml", checkIfExists:true) """ } } @@ -48,47 +43,5 @@ nextflow_workflow { ).match() } ) } - - } - - test("homo_sapiens - no_annotate") { - - when { - workflow { - """ - def meta = [id:"test", sample:"test", sex:"male", family: "test1", family_count:1, hpo:[], variant_type:'sv'] - input[0] = Channel.of([ - meta, - file(params.sv_vcf1, checkIfExists:true), - file(params.sv_tbi1, checkIfExists:true) - ]) - input[1] = Channel.of([ - meta, - [], - [] - ]) - input[2] = [] - input[3] = [] - input[4] = files("\${params.vcfanno_tomls}/*.toml", checkIfExists:true)[0] - input[5] = files("\${params.vcfanno_tomls}/*.toml", checkIfExists:true) - input[6] = false - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.collectEntries { key, value -> - if(key.matches("^\\d+\$") || key.matches('versions')) { - return null - } - return [ key, value ] - }.findAll { it != null } - ).match() } - ) - } - } } diff --git a/tests/subworkflows/local/vcf_annotate_vcfanno/main.nf.test.snap b/tests/subworkflows/local/vcf_annotate_vcfanno/main.nf.test.snap index 1887b614..27f102ff 100644 --- a/tests/subworkflows/local/vcf_annotate_vcfanno/main.nf.test.snap +++ b/tests/subworkflows/local/vcf_annotate_vcfanno/main.nf.test.snap @@ -1,32 +1,4 @@ { - "homo_sapiens - no_annotate": { - "content": [ - { - "vcfs": [ - [ - { - "id": "test", - "sample": "test", - "sex": "male", - "family": "test1", - "family_count": 1, - "hpo": [ - - ], - "variant_type": "sv" - }, - "test.sv.vcfanno.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test.sv.vcfanno.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.1", - "nextflow": "24.10.4" - }, - "timestamp": "2025-02-12T14:07:01.425728203" - }, "homo_sapiens - annotate": { "content": [ { @@ -43,16 +15,19 @@ ], "variant_type": "sv" }, - "test.sv.vcfanno.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test.sv.vcfanno.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + "/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/vcf/SVcontrol/sv/PosCon1.sv.vcf.gz", + "/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/vcf/SVcontrol/sv/PosCon1.sv.vcf.gz.tbi", + [ + + ] ] ] } ], "meta": { - "nf-test": "0.9.1", - "nextflow": "24.10.4" + "nf-test": "0.9.2", + "nextflow": "25.04.6" }, - "timestamp": "2025-02-12T14:06:51.583328891" + "timestamp": "2025-10-06T15:01:50.669942067" } } \ No newline at end of file diff --git a/tests/subworkflows/local/vcf_annotate_vep_annotsv/main.nf.test b/tests/subworkflows/local/vcf_annotate_vep_annotsv/main.nf.test.disabled similarity index 100% rename from tests/subworkflows/local/vcf_annotate_vep_annotsv/main.nf.test rename to tests/subworkflows/local/vcf_annotate_vep_annotsv/main.nf.test.disabled diff --git a/workflows/structural.nf b/workflows/structural.nf index ac347262..61dfbd3e 100644 --- a/workflows/structural.nf +++ b/workflows/structural.nf @@ -24,6 +24,7 @@ include { BAM_CNV_CALLING } from '../subworkflows/local/ba include { VCF_ANNOTATE_VEP_ANNOTSV } from '../subworkflows/local/vcf_annotate_vep_annotsv/main' include { VCF_ANNOTATE_VCFANNO } from '../subworkflows/local/vcf_annotate_vcfanno/main' include { BAM_REPEAT_ESTIMATION_EXPANSIONHUNTER } from '../subworkflows/local/bam_repeat_estimation_expansionhunter/main' +include { VCF_ANNOTATE } from '../subworkflows/local/vcf_annotate/main' include { VCF_CONCAT_BCFTOOLS } from '../subworkflows/local/vcf_concat_bcftools/main' include { VCF_MERGE_FAMILY_JASMINE } from '../subworkflows/local/vcf_merge_family_jasmine/main' @@ -76,9 +77,9 @@ workflow STRUCTURAL { qdnaseq_male // The QDNAseq annotations for male samples wisecondorx_reference // The WisecondorX annotations file vep_cache // The VEP cache directory - annotsv_annotations // The annotations directory for AnnotSV - annotsv_candidate_genes // A file containing the AnnotSV candidate genes - annotsv_gene_transcripts // A file containing the AnnotSV gene transcripts + // annotsv_annotations // The annotations directory for AnnotSV + // annotsv_candidate_genes // A file containing the AnnotSV candidate genes + // annotsv_gene_transcripts // A file containing the AnnotSV gene transcripts vcfanno_lua // A Lua script to use with vcfanno vcfanno_resources // A comma delimited list of paths to vcfanno resource files vcfanno_toml // The vcfanno config to @@ -87,6 +88,8 @@ workflow STRUCTURAL { svync_dir // A directory containing svync configs (they must end in '.yaml' and contain the full name of the caller) bedgovcf_dir // A directory containing bedgovcf configs (they must end in '.yaml' and contain the full name of the caller) vcfanno_default_dir // A directory containing the default vcfanno configs (they must end in '.toml') + strvctvre_phylop // A bigwig file containing the phylop reference for StrVCTVRE + strvctvre_data // A directory containing the reference data for StrVCTVRE // boolean inputs annotate // Run annotation on SV and CNV data @@ -103,6 +106,7 @@ workflow STRUCTURAL { vep_cache_version // The version of the VEP cache to use filter // The filter pattern to use after annotation outdir // The output directory of the pipeline + annotate_tools // The tools to be used for annotation main: @@ -118,8 +122,8 @@ workflow STRUCTURAL { // def ch_fasta = Channel.fromPath(fasta).collect { fasta_file -> [[id:'fasta'], fasta_file ] } - def ch_annotsv_candidate_genes = annotsv_candidate_genes ? Channel.fromPath(annotsv_candidate_genes).collect { genes_file -> [[], genes_file] } : [[],[]] - def ch_annotsv_gene_transcripts = annotsv_gene_transcripts ? Channel.fromPath(annotsv_gene_transcripts).collect { transcripts_file -> [[], transcripts_file] } : [[],[]] + // def ch_annotsv_candidate_genes = annotsv_candidate_genes ? Channel.fromPath(annotsv_candidate_genes).collect { genes_file -> [[], genes_file] } : [[],[]] + // def ch_annotsv_gene_transcripts = annotsv_gene_transcripts ? Channel.fromPath(annotsv_gene_transcripts).collect { transcripts_file -> [[], transcripts_file] } : [[],[]] def ch_vcfanno_lua = vcfanno_lua ? Channel.fromPath(vcfanno_lua).collect() : [] def ch_catalog = expansionhunter_catalog ? Channel.fromPath(expansionhunter_catalog).collect { catalog_file -> [[id:'catalog'], catalog_file] } : [[],[]] def ch_qdnaseq_male = qdnaseq_male ? Channel.fromPath(qdnaseq_male).collect { qdnaseq_file -> [[id:'qdnaseq_male'], qdnaseq_file] } : [[],[]] @@ -129,6 +133,8 @@ workflow STRUCTURAL { def ch_manta_config = manta_config ? Channel.fromPath(manta_config).collect() : [[]] def ch_svync_configs = svync_dir ? Channel.fromPath("${svync_dir}/*.yaml", checkIfExists:true).collect() : [] def ch_bedgovcf_configs = bedgovcf_dir ? Channel.fromPath("${bedgovcf_dir}/*.yaml", checkIfExists:true).collect() : [] + def ch_strvctvre_phylop = strvctvre_phylop ? Channel.value([[id:'phylop'], file(strvctvre_phylop)]) : [[:],[]] + def ch_strvctvre_data = strvctvre_data ? Channel.value([[id:'strvctvre_data'], file(strvctvre_data)]) : [[:],[]] def val_vcfanno_resources = vcfanno_resources ? vcfanno_resources.split(",").collect { resource_file -> file(resource_file, checkIfExists:true) }.flatten() : [] def val_default_vcfanno_tomls = vcfanno_default_dir ? files("${vcfanno_default_dir}/*.toml", checkIfExists:true) : [] @@ -187,6 +193,10 @@ workflow STRUCTURAL { error("Can't create BEDPE files from VCFs that contains repeat expansions. Don't specify either --concat_output or omit all repeat callers from the --callers parameter.") } + if (annotate && (annotate_tools.contains("svannotate") || annotate_tools.contains("all")) && !gtf) { + error("The GTF file is required when using SVAnnotate. Please provide it using the 'gtf' parameter.") + } + // // Create optional inputs // @@ -258,28 +268,28 @@ workflow STRUCTURAL { // ch_bwa_index = Channel.empty() // } - def ch_annotsv_annotations = Channel.empty() - if(annotate && !annotsv_annotations && callers.intersect(annotationCallers)) { - ANNOTSV_INSTALLANNOTATIONS() - ch_versions = ch_versions.mix(ANNOTSV_INSTALLANNOTATIONS.out.versions) + // def ch_annotsv_annotations = Channel.empty() + // if(annotate && !annotsv_annotations && callers.intersect(annotationCallers)) { + // ANNOTSV_INSTALLANNOTATIONS() + // ch_versions = ch_versions.mix(ANNOTSV_INSTALLANNOTATIONS.out.versions) - ch_annotsv_annotations = ANNOTSV_INSTALLANNOTATIONS.out.annotations - .collect { annotations -> [[id:"annotsv_annotations"], annotations] } - } - else if(annotate && callers.intersect(annotationCallers)) { - ch_annotsv_annotations_input = Channel.fromPath(annotsv_annotations).collect { annotations -> [[id:"annotsv_annotations"], annotations] } - if(annotsv_annotations.endsWith(".tar.gz")){ - UNTAR_ANNOTSV( - ch_annotsv_annotations_input - ) - ch_versions = ch_versions.mix(UNTAR_ANNOTSV.out.versions) - - ch_annotsv_annotations = UNTAR_ANNOTSV.out.untar - .collect() - } else { - ch_annotsv_annotations = Channel.fromPath(annotsv_annotations).collect { annotations -> [[id:"annotsv_annotations"], annotations] } - } - } + // ch_annotsv_annotations = ANNOTSV_INSTALLANNOTATIONS.out.annotations + // .collect { annotations -> [[id:"annotsv_annotations"], annotations] } + // } + // else if(annotate && callers.intersect(annotationCallers)) { + // ch_annotsv_annotations_input = Channel.fromPath(annotsv_annotations).collect { annotations -> [[id:"annotsv_annotations"], annotations] } + // if(annotsv_annotations.endsWith(".tar.gz")){ + // UNTAR_ANNOTSV( + // ch_annotsv_annotations_input + // ) + // ch_versions = ch_versions.mix(UNTAR_ANNOTSV.out.versions) + + // ch_annotsv_annotations = UNTAR_ANNOTSV.out.untar + // .collect() + // } else { + // ch_annotsv_annotations = Channel.fromPath(annotsv_annotations).collect { annotations -> [[id:"annotsv_annotations"], annotations] } + // } + // } def ch_vep_cache = Channel.empty() if(!vep_cache && annotate && callers.intersect(annotationCallers)) { @@ -422,78 +432,39 @@ workflow STRUCTURAL { // Annotate the variants // - def ch_annotation_output = Channel.empty() - def ch_annotsv_vcfs = Channel.empty() - if(annotate) { - VCF_ANNOTATE_VEP_ANNOTSV( + def ch_annotation_output = ch_annotation_input + if(annotate && annotate_tools.size() > 0) { + VCF_ANNOTATE( ch_annotation_input, - ch_inputs.small_variants, ch_fasta, - ch_annotsv_annotations, - ch_annotsv_candidate_genes, - ch_annotsv_gene_transcripts, + ch_fai, + ch_dict, + ch_preprocessed_gtf, ch_vep_cache, ch_vep_extra_files, - variant_types, + ch_vcfanno_lua, + val_vcfanno_toml, + ch_strvctvre_phylop, + ch_strvctvre_data, genome, species, - vep_cache_version - ) - - ch_reports = ch_reports.mix(VCF_ANNOTATE_VEP_ANNOTSV.out.reports) - ch_versions = ch_versions.mix(VCF_ANNOTATE_VEP_ANNOTSV.out.versions) - ch_annotation_output = VCF_ANNOTATE_VEP_ANNOTSV.out.vep_vcfs - ch_annotsv_vcfs = VCF_ANNOTATE_VEP_ANNOTSV.out.annotsv_vcfs - } else { - ch_annotation_output = ch_annotation_input - } - - def ch_vcfanno_output = Channel.empty() - if(annotate || vcfanno_toml) { - VCF_ANNOTATE_VCFANNO( - ch_annotation_output, - ch_annotsv_vcfs, - ch_vcfanno_lua, + vep_cache_version, val_vcfanno_resources, - val_vcfanno_toml, val_default_vcfanno_tomls, - annotate + annotate_tools ) - ch_versions = ch_versions.mix(VCF_ANNOTATE_VCFANNO.out.versions) - ch_vcfanno_output = VCF_ANNOTATE_VCFANNO.out.vcfs - } else { - ch_vcfanno_output = ch_annotation_input + ch_versions = ch_versions.mix(VCF_ANNOTATE.out.versions) + ch_reports = ch_reports.mix(VCF_ANNOTATE.out.reports) + ch_annotation_output = VCF_ANNOTATE.out.vcfs } - def ch_filter_outputs = Channel.empty() + def ch_outputs = ch_annotation_output if(filter) { BCFTOOLS_FILTER( - ch_vcfanno_output + ch_annotation_output ) - ch_filter_outputs = BCFTOOLS_FILTER.out.vcf.join(BCFTOOLS_FILTER.out.tbi, failOnMismatch:true, failOnDuplicate:true) + ch_outputs = BCFTOOLS_FILTER.out.vcf.join(BCFTOOLS_FILTER.out.tbi, failOnMismatch:true, failOnDuplicate:true) ch_versions = ch_versions.mix(BCFTOOLS_FILTER.out.versions) - } else { - ch_filter_outputs = ch_vcfanno_output - } - - def ch_outputs = Channel.empty() - if(gtf) { - def ch_svannotate_input = ch_filter_outputs - .map { meta, vcf, tbi -> - [ meta, vcf, tbi, [], [] ] // TODO add BED files - } - - GATK4_SVANNOTATE( - ch_svannotate_input, - ch_fasta, - ch_fai, - ch_dict, - ch_preprocessed_gtf - ) - ch_versions = ch_versions.mix(GATK4_SVANNOTATE.out.versions.first()) - ch_outputs = ch_outputs.mix(GATK4_SVANNOTATE.out.vcf.join(GATK4_SVANNOTATE.out.tbi, failOnMismatch:true, failOnDuplicate:true)) - } else { - ch_outputs = ch_outputs.mix(ch_filter_outputs) } //