From ba41df6b5707df4b59b9e9ad775daeda959ba381 Mon Sep 17 00:00:00 2001
From: Jorisvansteenbrugge
 <7196110+Jorisvansteenbrugge@users.noreply.github.com>
Date: Wed, 18 Feb 2026 13:33:43 +0100
Subject: [PATCH 1/5] add fusioninspector to fusion subworkflow

---
 conf/modules.config                           |  16 ++
 modules.json                                  |   5 +
 .../nf-core/fusioninspector/environment.yml   |  12 ++
 modules/nf-core/fusioninspector/main.nf       |  66 ++++++
 modules/nf-core/fusioninspector/meta.yml      | 150 +++++++++++++
 .../fusioninspector/tests/main.nf.test        | 111 ++++++++++
 .../fusioninspector/tests/main.nf.test.snap   | 199 ++++++++++++++++++
 .../fusioninspector/tests/nextflow.config     |   5 +
 nextflow.config                               |   1 +
 subworkflows/local/bam_gene_fusion/main.nf    |  26 ++-
 .../local/bam_gene_fusion/tests/main.nf.test  |  14 +-
 .../fastq_trim_filter_align_dedup/main.nf     |  18 +-
 workflows/dxnextflowrna.nf                    |   2 +-
 13 files changed, 606 insertions(+), 19 deletions(-)
 create mode 100644 modules/nf-core/fusioninspector/environment.yml
 create mode 100644 modules/nf-core/fusioninspector/main.nf
 create mode 100644 modules/nf-core/fusioninspector/meta.yml
 create mode 100644 modules/nf-core/fusioninspector/tests/main.nf.test
 create mode 100644 modules/nf-core/fusioninspector/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/fusioninspector/tests/nextflow.config

diff --git a/conf/modules.config b/conf/modules.config
index f309638..9fd1ae5 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -46,6 +46,22 @@ process {
         time   = { 10.m + (1.ms * reads.sum { it.size() } / reads.count { it } / 1000) * task.attempt }
     }
 
+    withName: FUSIONINSPECTOR {
+        cpus   = { 12 }
+        memory = { 40.GB * task.attempt }
+        time   = { 4.h * task.attempt }
+
+
+        ext.args = { params.fusioninspector_limitSjdbInsertNsj != 1000000 ? "--STAR_xtra_params \"--limitSjdbInsertNsj ${params.fusioninspector_limitSjdbInsertNsj}\"" : '' }
+        ext.args2 = '--annotate --examine_coding_effect'
+        publishDir = [
+            path: { "${params.outdir}/fusioninspector/${meta.id}" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+    
+
     withName: MULTIQC {
         clusterOptions = "${params.cluster_options}"
         ext.args       = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : "--title \"${params.analysis_id}\"" }
diff --git a/modules.json b/modules.json
index d231cd1..fcd52fc 100644
--- a/modules.json
+++ b/modules.json
@@ -15,6 +15,11 @@
                         "git_sha": "dc94b6ee04a05ddb9f7ae050712ff30a13149164",
                         "installed_by": ["modules"]
                     },
+                    "fusioninspector": {
+                        "branch": "master",
+                        "git_sha": "310a7a59c7f2362d25070e5928f3139f92377eaf",
+                        "installed_by": ["modules"]
+                    },
                     "fusionreport/detect": {
                         "branch": "master",
                         "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726",
diff --git a/modules/nf-core/fusioninspector/environment.yml b/modules/nf-core/fusioninspector/environment.yml
new file mode 100644
index 0000000..e0e8ab3
--- /dev/null
+++ b/modules/nf-core/fusioninspector/environment.yml
@@ -0,0 +1,12 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::fusion-inspector=2.10.0
+  - bioconda::igv-reports=1.14.1
+  - bioconda::perl-json-xs=4.03
+  - bioconda::pysam=0.22.1
+  - conda-forge::perl-carp-assert=0.21
+  - conda-forge::pip==24.3.1
+  - pip:
+      - intervaltree==3.1.0
diff --git a/modules/nf-core/fusioninspector/main.nf b/modules/nf-core/fusioninspector/main.nf
new file mode 100644
index 0000000..5e25466
--- /dev/null
+++ b/modules/nf-core/fusioninspector/main.nf
@@ -0,0 +1,66 @@
+process FUSIONINSPECTOR {
+    tag "$meta.id"
+    label 'process_high'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/13/139b94a1f10c6e23a8c27eaed1e5a689db978a513d0ee155e74d35f0970814fe/data' :
+        'community.wave.seqera.io/library/fusion-inspector_igv-reports_perl-json-xs_pysam_pruned:c6147971d107ab11'}"
+
+    input:
+    tuple val(meta), path(reads), path(fusion_list)
+    tuple val(meta2), path(reference)
+
+    output:
+    tuple val(meta), path("*FusionInspector.fusions.tsv"), emit: tsv         , optional:true
+    tuple val(meta), path("fi_workdir/*.gtf")            , emit: out_gtf     , optional:true
+    tuple val(meta), path("*FusionInspector.log")        , emit: log         , optional:true
+    tuple val(meta), path("*html")                       , emit: html        , optional:true
+    tuple val(meta), path("*abridged.tsv")               , emit: abridged_tsv, optional:true
+    tuple val(meta), path("IGV_inputs")                  , emit: igv_inputs  , optional:true
+    tuple val(meta), path("fi_workdir")                  , emit: fi_workdir  , optional:true
+    tuple val(meta), path("chckpts_dir")                 , emit: chckpts_dir , optional:true
+    path  "versions.yml"                                 , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def fasta  = meta.single_end ? "--left_fq ${reads[0]}" : "--left_fq ${reads[0]} --right_fq ${reads[1]}"
+    def args   = task.ext.args   ?: ''
+    def args2  = task.ext.args2  ?: ''
+    """
+    FusionInspector \\
+        --fusions $fusion_list \\
+        --genome_lib ${reference} \\
+        $fasta \\
+        --CPU ${task.cpus} \\
+        -O . \\
+        --out_prefix $prefix \\
+        --vis $args $args2
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        FusionInspector: \$(FusionInspector --version 2>&1 | grep -i 'version' | sed -e 's/FusionInspector version: //' -e 's/[[:space:]]//g')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch FusionInspector.log
+    touch ${prefix}.FusionInspector.fusions.abridged.tsv
+    touch ${prefix}.FusionInspector.fusions.tsv
+    touch ${prefix}.fusion_inspector_web.html
+    mkdir -p chckpts_dir
+    mkdir -p fi_workdir
+    touch fi_workdir/${prefix}.gtf
+    mkdir -p IGV_inputs
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        FusionInspector: \$(FusionInspector --version 2>&1 | grep -i 'version' | sed -e 's/FusionInspector version: //' -e 's/[[:space:]]//g')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/fusioninspector/meta.yml b/modules/nf-core/fusioninspector/meta.yml
new file mode 100644
index 0000000..1e94eee
--- /dev/null
+++ b/modules/nf-core/fusioninspector/meta.yml
@@ -0,0 +1,150 @@
+name: fusioninspector
+description: Validation of Fusion Transcript Predictions
+keywords:
+  - fusioninspector
+  - fusion
+  - RNA-seq
+  - fastq
+tools:
+  - fusioninspector:
+      description: Validation of Fusion Transcript Predictions
+      homepage: https://github.com/FusionInspector/FusionInspector
+      documentation: https://github.com/FusionInspector/FusionInspector/wiki
+      tool_dev_url: https://github.com/FusionInspector/FusionInspector
+      doi: 10.1101/2021.08.02.454639"
+      licence: ["BSD-3-Clause"]
+      identifier: ""
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - reads:
+        type: file
+        description: FASTQ file
+        pattern: "*.{fastq*}"
+        ontologies:
+          - edam: "http://edamontology.org/format_1930" # FASTQ
+    - fusion_list:
+        type: file
+        description: Fusion targets list
+        pattern: "*.{txt}"
+        ontologies:
+          - edam: "http://edamontology.org/format_2330" # Textual format
+  - - meta2:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - reference:
+        type: directory
+        description: Path to CTAT references
+        pattern: "*"
+
+output:
+  tsv:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*FusionInspector.fusions.tsv":
+          type: file
+          description: FusionInspector output TSV file
+          pattern: "*.tsv"
+          ontologies:
+            - edam: "http://edamontology.org/format_3475" # TSV
+  out_gtf:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - fi_workdir/*.gtf:
+          type: file
+          description: GTF output file
+          pattern: "*.gtf"
+          ontologies:
+            - edam: "http://edamontology.org/format_2306" # GTF
+  log:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*FusionInspector.log":
+          type: file
+          description: FusionInspector log file
+          pattern: "*.log"
+          ontologies:
+            - edam: "http://edamontology.org/data_1678" # Log file
+            - edam: "http://edamontology.org/format_2330" # Plain text
+  html:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*html":
+          type: file
+          description: HTML output files
+          pattern: "*.html"
+          ontologies:
+            - edam: "http://edamontology.org/format_2331" # HTML
+  abridged_tsv:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*abridged.tsv":
+          type: file
+          description: Abridged TSV output file
+          pattern: "*.tsv"
+          ontologies:
+            - edam: "http://edamontology.org/format_3475" # TSV
+  igv_inputs:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - IGV_inputs:
+          type: directory
+          description: IGV inputs directory
+          pattern: "IGV_inputs"
+  fi_workdir:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - fi_workdir:
+          type: directory
+          description: FusionInspector work directory
+          pattern: "fi_workdir"
+  chckpts_dir:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - chckpts_dir:
+          type: directory
+          description: Checkpoints directory
+          pattern: "chckpts_dir"
+  versions:
+    - versions.yml:
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+        ontologies:
+          - edam: http://edamontology.org/format_3750 # YAML
+authors:
+  - "@rannick"
+  - "@delfiterradas"
+  - "@sofiromano"
+  - "@alanmmobbs93"
+  - "@martings"
diff --git a/modules/nf-core/fusioninspector/tests/main.nf.test b/modules/nf-core/fusioninspector/tests/main.nf.test
new file mode 100644
index 0000000..01e186f
--- /dev/null
+++ b/modules/nf-core/fusioninspector/tests/main.nf.test
@@ -0,0 +1,111 @@
+nextflow_process {
+
+    name "Test Process FUSIONINSPECTOR"
+    script "../main.nf"
+    process "FUSIONINSPECTOR"
+    tag "modules"
+    tag "modules_local"
+    tag "fusioninspector"
+    tag "starfusion/build"
+    tag "modules_nfcore"
+
+    test("FUSIONINSPECTOR - test") {
+        config './nextflow.config'
+
+        setup {
+            run("STARFUSION_BUILD") {
+                script "../../starfusion/build/main.nf"
+                process {
+                    """
+                    input[0] = [
+                        [ id:'minigenome_fasta' ],
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/minigenome.fa')
+                    ]
+                    input[1] = [
+                        [ id:'minigenome_gtf' ],
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/minigenome.gtf')
+                    ]
+
+                    input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/CTAT_HumanFusionLib.mini.dat.gz')
+                    input[3] = "homo_sapiens"
+                    input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/Pfam-A.hmm.gz')
+                    input[5] = [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/test_starfusion_dfam.hmm'),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/test_starfusion_dfam.hmm.h3f'),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/test_starfusion_dfam.hmm.h3i'),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/test_starfusion_dfam.hmm.h3m'),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/test_starfusion_dfam.hmm.h3p')
+                    ]
+                    input[6] = "https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/AnnotFilterRule.pm"
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/test_starfusion_rnaseq_1.fastq.gz'),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/test_starfusion_rnaseq_2.fastq.gz')
+                    ], // reads
+                    [
+                        file("https://github.com/FusionInspector/FusionInspector/raw/master/test/fusion_targets.A.txt")
+                    ]
+                ]
+                input[1] = STARFUSION_BUILD.out.reference
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert file(process.out.tsv[0][1]).getText().contains("THRA--AC090627.1\t72\t83\t72.00\t80.15") },
+                { assert file(process.out.abridged_tsv[0][1]).getText().contains("THRA--AC090627.1\t72\t83\t72.00\t80.15") },
+                { assert snapshot(
+                    file(process.out.tsv[0][1]).name,
+                    process.out.out_gtf,
+                    file(process.out.abridged_tsv[0][1]).name,
+                    file(process.out.log[0][1]).name,
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("FUSIONINSPECTOR - test - stub") {
+        options "-stub"
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/test_starfusion_rnaseq_1.fastq.gz'),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/test_starfusion_rnaseq_2.fastq.gz')
+                    ], // reads
+                    [
+                        file("https://github.com/FusionInspector/FusionInspector/raw/master/test/fusion_targets.A.txt")
+                    ]
+                ]
+                input[1] = [[id:'minigenome_refs'],[]]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out
+                    ).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/fusioninspector/tests/main.nf.test.snap b/modules/nf-core/fusioninspector/tests/main.nf.test.snap
new file mode 100644
index 0000000..4a8d000
--- /dev/null
+++ b/modules/nf-core/fusioninspector/tests/main.nf.test.snap
@@ -0,0 +1,199 @@
+{
+    "FUSIONINSPECTOR - test": {
+        "content": [
+            "test.FusionInspector.fusions.tsv",
+            [
+                [
+                    {
+                        "id": "test",
+                        "single_end": false
+                    },
+                    "test.gtf:md5,d523061bd8b443a014d4cca9406ec772"
+                ]
+            ],
+            "test.FusionInspector.fusions.abridged.tsv",
+            "FusionInspector.log",
+            [
+                "versions.yml:md5,7c9694f4a2d8edbd30fd9674566b764c"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "25.04.6"
+        },
+        "timestamp": "2025-07-07T13:48:15.295143368"
+    },
+    "FUSIONINSPECTOR - test - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.FusionInspector.fusions.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "FusionInspector.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "3": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.fusion_inspector_web.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "4": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.FusionInspector.fusions.abridged.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "5": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            
+                        ]
+                    ]
+                ],
+                "6": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "7": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            
+                        ]
+                    ]
+                ],
+                "8": [
+                    "versions.yml:md5,7c9694f4a2d8edbd30fd9674566b764c"
+                ],
+                "abridged_tsv": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.FusionInspector.fusions.abridged.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "chckpts_dir": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            
+                        ]
+                    ]
+                ],
+                "fi_workdir": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "html": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.fusion_inspector_web.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "igv_inputs": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        [
+                            
+                        ]
+                    ]
+                ],
+                "log": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "FusionInspector.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "out_gtf": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "tsv": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.FusionInspector.fusions.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,7c9694f4a2d8edbd30fd9674566b764c"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "25.04.6"
+        },
+        "timestamp": "2025-07-18T20:42:22.142466729"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fusioninspector/tests/nextflow.config b/modules/nf-core/fusioninspector/tests/nextflow.config
new file mode 100644
index 0000000..ca61431
--- /dev/null
+++ b/modules/nf-core/fusioninspector/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: 'FUSIONINSPECTOR' {
+        ext.args = '--annotate --examine_coding_effect'
+    }
+}
diff --git a/nextflow.config b/nextflow.config
index f69d66c..ecff672 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -36,6 +36,7 @@ params {
     arriba_protein           = null
     arriba_known_fusions     = null
     fusion_tools_cutoff      = 1
+    fusioninspector_limitSjdbInsertNsj = 1000000
     
     // Dx_tracks references
     dx_tracks_path            = null
diff --git a/subworkflows/local/bam_gene_fusion/main.nf b/subworkflows/local/bam_gene_fusion/main.nf
index 8a905e0..b657937 100644
--- a/subworkflows/local/bam_gene_fusion/main.nf
+++ b/subworkflows/local/bam_gene_fusion/main.nf
@@ -1,15 +1,16 @@
-include { STARFUSION_DETECT   } from '../../../modules/nf-core/starfusion/detect/main'
-include { ARRIBA_ARRIBA       } from '../../../modules/nf-core/arriba/arriba/main'
-include { FUSIONREPORT_DETECT } from '../../../modules/nf-core/fusionreport/detect/main'
+include { STARFUSION_DETECT     } from '../../../modules/nf-core/starfusion/detect/main'
+include { ARRIBA_ARRIBA         } from '../../../modules/nf-core/arriba/arriba/main'
+include { FUSIONINSPECTOR       } from '../../../modules/nf-core/fusioninspector/main'
+include { FUSIONREPORT_DETECT   } from '../../../modules/nf-core/fusionreport/detect/main'
 include { FUSIONREPORT_DOWNLOAD } from '../../../modules/nf-core/fusionreport/download/main'
 
 
 workflow BAM_GENE_FUSION {
-    
     take:
-    ch_star_junctions
+    ch_star_junctions //: Channel<Tuple<Map, Path>>
     ch_starfusion_ref
     ch_bam
+    ch_fastq
     ch_genome_fasta
     ch_genome_gtf
     arriba_blacklist
@@ -49,6 +50,21 @@ workflow BAM_GENE_FUSION {
         params.fusion_tools_cutoff
     )
 
+    ch_reads_fusions = ch_fastq
+        .map{ meta, reads -> [meta, reads] }
+        .join(FUSIONREPORT_DETECT.out.fusion_list
+              .map{meta, fusion_list -> [meta,[fusion_list]]})
+
+    ch_reads_fusions.view()
+
+    ch_starfusion_ref.view()
+    
+    FUSIONINSPECTOR(
+        ch_reads_fusions,
+        ch_starfusion_ref.map{fusion_ref ->
+            [[id: "starfusion_index"], fusion_ref]}
+    )
+
     
     emit:
     starfusion_fusions       = STARFUSION_DETECT.out.fusions
diff --git a/subworkflows/local/bam_gene_fusion/tests/main.nf.test b/subworkflows/local/bam_gene_fusion/tests/main.nf.test
index c90a068..83c1705 100644
--- a/subworkflows/local/bam_gene_fusion/tests/main.nf.test
+++ b/subworkflows/local/bam_gene_fusion/tests/main.nf.test
@@ -26,7 +26,6 @@ nextflow_workflow {
                     [ id:'minigenome_gtf' ],
                     file(params.nfcore_modules_testdata + 'genomics/homo_sapiens/genome/minigenome.gtf')
                 ]
-
                 input[2] = file(params.nfcore_modules_testdata + 'genomics/homo_sapiens/genome/CTAT_HumanFusionLib.mini.dat.gz')
                 input[3] = "homo_sapiens"
                 input[4] = file(params.nfcore_modules_testdata + 'genomics/homo_sapiens/genome/Pfam-A.hmm.gz')
@@ -43,7 +42,7 @@ nextflow_workflow {
         }
     }
 
-    test("Run starfusion with junctions") {
+    test("Run gene fusion subworkflow") {
         options "-stub"
         when{
             workflow {
@@ -59,18 +58,25 @@ nextflow_workflow {
                     file("${TEST_DATA_BASE}/fraser_module/bam/sample1.bam.bai")
                 ])
                 input[3] = Channel.of([
+                    [id:'sample1'],
+                    [
+                        file("sample1_1.fastq.gz"),
+                        file("sample1_2.fastq.gz")
+                    ]
+                ])
+                input[4] = Channel.of([
                     [ id:'minigenome_fasta' ],
                     file(params.nfcore_modules_testdata + 'genomics/homo_sapiens/genome/minigenome.fa'),
                     file(params.nfcore_modules_testdata + 'genomics/homo_sapiens/genome/minigenome.fa.fai')
                 ])
-                input[4] = Channel.of([
+                input[5] = Channel.of([
                     [ id:'minigenome_gtf' ],
                     file(params.nfcore_modules_testdata + 'genomics/homo_sapiens/genome/minigenome.gtf')
                 ])
-                input[5] = []
                 input[6] = []
                 input[7] = []
                 input[8] = []
+                input[9] = []
                 """
             }
         }
diff --git a/subworkflows/local/fastq_trim_filter_align_dedup/main.nf b/subworkflows/local/fastq_trim_filter_align_dedup/main.nf
index abbf412..61d5389 100644
--- a/subworkflows/local/fastq_trim_filter_align_dedup/main.nf
+++ b/subworkflows/local/fastq_trim_filter_align_dedup/main.nf
@@ -31,18 +31,17 @@ workflow FASTQ_TRIM_FILTER_ALIGN_DEDUP {
     ch_versions = ch_versions.mix(SORTMERNA_READS.out.versions.first())
 
     
-    
-    
-    STAR_ALIGN(
-        SORTMERNA_READS.out.reads.map {meta, reads ->
+    ch_fastq = SORTMERNA_READS.out.reads
+        .map {meta, reads ->
             def new_id = meta.id.split('_')[0]
             [meta + [id: new_id], reads]}
         .groupTuple()
-        .map{
-                meta, reads ->
-                def reads_flat = reads.flatten()
-                [meta, reads_flat]
-            },
+        .map { meta, reads ->
+              def reads_flat = reads.flatten()
+              [meta, reads_flat]}
+    
+    STAR_ALIGN(
+        ch_fastq,
         ch_star_index,
         ch_gtf,
         star_ignore_sjdbgtf,
@@ -91,6 +90,7 @@ workflow FASTQ_TRIM_FILTER_ALIGN_DEDUP {
     emit:
     trim_reads                   = TRIMGALORE.out.reads // channel: [ val(meta), path(fq.gz) ]
     trim_unpaired                = TRIMGALORE.out.unpaired // channel: [ val(meta), path(fq.gz) ]
+    ch_fastq                     = ch_fastq
     trim_html                    = TRIMGALORE.out.html // channel: [ val(meta), path(html) ]
     trim_zip                     = TRIMGALORE.out.zip // channel: [ val(meta), path(zip) ]
     trim_log                     = TRIMGALORE.out.log // channel: [ val(meta), path(txt) ]
diff --git a/workflows/dxnextflowrna.nf b/workflows/dxnextflowrna.nf
index a9cfb25..91b5e0e 100644
--- a/workflows/dxnextflowrna.nf
+++ b/workflows/dxnextflowrna.nf
@@ -158,11 +158,11 @@ workflow DXNEXTFLOWRNA {
     if (params.run_gene_fusion){
         ch_starfusion_ref = Channel.fromPath(params.starfusion_ref).collect()
         
-        
         BAM_GENE_FUSION(
             FASTQ_TRIM_FILTER_ALIGN_DEDUP.out.star_align_junction,
             ch_starfusion_ref,
             FASTQ_TRIM_FILTER_ALIGN_DEDUP.out.ch_bam_bai,
+            FASTQ_TRIM_FILTER_ALIGN_DEDUP.out.ch_fastq,
             ch_fasta_fai,
             ch_gtf,
             params.arriba_blacklist,

From 373682660471ac3bbd92284bf08d4d6792df5b42 Mon Sep 17 00:00:00 2001
From: Jorisvansteenbrugge
 <7196110+Jorisvansteenbrugge@users.noreply.github.com>
Date: Fri, 20 Feb 2026 11:56:31 +0100
Subject: [PATCH 2/5] test fusion vcf export

---
 conf/modules.config                           |  22 +
 modules/local/hgnc_download/main.nf           |  38 ++
 modules/local/vcf_collect/main.nf             |  46 ++
 .../resources/usr/bin/vcf_collect.py          | 622 ++++++++++++++++++
 subworkflows/local/bam_gene_fusion/main.nf    |  26 +-
 5 files changed, 749 insertions(+), 5 deletions(-)
 create mode 100644 modules/local/hgnc_download/main.nf
 create mode 100644 modules/local/vcf_collect/main.nf
 create mode 100755 modules/local/vcf_collect/resources/usr/bin/vcf_collect.py

diff --git a/conf/modules.config b/conf/modules.config
index 9fd1ae5..95dd3bc 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -411,4 +411,26 @@ process {
         ]
     }
 
+
+    withName: HGNC_DOWNLOAD {
+        cpus = 1
+        memory = { 1.GB }
+        time = { 5.m * task.attempt }
+    }
+
+    withName: VCF_COLLECT {
+        cpus = 2
+        memory = { 5.GB }
+        time = { 10.m * task.attempt }
+
+        publishDir = [
+            [
+                path: "${params.outdir}/fusions/vcf/",
+                mode: params.publish_dir_mode,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            ]
+        ]
+    }
+    
+
 }
diff --git a/modules/local/hgnc_download/main.nf b/modules/local/hgnc_download/main.nf
new file mode 100644
index 0000000..ee14e2c
--- /dev/null
+++ b/modules/local/hgnc_download/main.nf
@@ -0,0 +1,38 @@
+process HGNC_DOWNLOAD {
+    tag "hgnc"
+    label 'process_low'
+
+    conda "bioconda::gnu-wget=1.18"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/3b/3b54fa9135194c72a18d00db6b399c03248103f87e43ca75e4b50d61179994b3/data' :
+        'community.wave.seqera.io/library/wget:1.21.4--8b0fcde81c17be5e' }"
+
+    output:
+    path "hgnc_complete_set.txt"        , emit: hgnc_ref
+    path "HGNC-DB-timestamp.txt"        , emit: hgnc_date
+    path "versions.yml"                 , emit: versions
+
+
+    script:
+    """
+    wget --no-check-certificate https://storage.googleapis.com/public-download-files/hgnc/tsv/tsv/hgnc_complete_set.txt
+    date +%Y-%m-%d/%H:%M  > HGNC-DB-timestamp.txt
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        wget: \$(wget --version | head -1 | cut -d ' ' -f 3)
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    touch "hgnc_complete_set.txt"
+    touch "HGNC-DB-timestamp.txt"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        wget: \$(wget --version | head -1 | cut -d ' ' -f 3)
+    END_VERSIONS
+    """
+
+}
diff --git a/modules/local/vcf_collect/main.nf b/modules/local/vcf_collect/main.nf
new file mode 100644
index 0000000..e43a117
--- /dev/null
+++ b/modules/local/vcf_collect/main.nf
@@ -0,0 +1,46 @@
+process VCF_COLLECT {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "conda-forge::pandas=1.5.2"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
+        'biocontainers/pandas:1.5.2' }"
+
+    input:
+    tuple val(meta), path(fusioninspector_tsv), path(fusioninspector_gtf_tsv), path(fusionreport_report), path(fusionreport_csv)
+    tuple val(meta2),  path(hgnc_ref)
+    tuple val(meta3),  path(hgnc_date)
+
+    output:
+    path "versions.yml"              , emit: versions
+    tuple val(meta), path("*vcf")    , emit: vcf
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    vcf_collect.py --fusioninspector $fusioninspector_tsv --fusionreport $fusionreport_report --fusioninspector_gtf $fusioninspector_gtf_tsv --fusionreport_csv $fusionreport_csv --hgnc $hgnc_ref --sample ${prefix} --out ${prefix}_fusion_data.vcf
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+        vcf_collect: 0.1
+        HGNC DB retrieval: \$(cat $hgnc_date)
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}_fusion_data.vcf
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+        vcf_collect: 0.1
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/vcf_collect/resources/usr/bin/vcf_collect.py b/modules/local/vcf_collect/resources/usr/bin/vcf_collect.py
new file mode 100755
index 0000000..292c052
--- /dev/null
+++ b/modules/local/vcf_collect/resources/usr/bin/vcf_collect.py
@@ -0,0 +1,622 @@
+#!/usr/bin/env python3
+
+# Author: Annick Renevey, annick.renevey@scilifelab.se
+# License: MIT
+#
+# This script is part of the rnafusion pipeline.
+# For full license and authorship information, see the repository README.
+#
+
+import argparse
+import logging
+import sys
+from pathlib import Path
+import pandas as pd
+import ast
+import numpy as np
+import csv
+
+logger = logging.getLogger()
+
+
+def vcf_collect(
+    fusioninspector_in_file: str,
+    fusionreport_in_file: str,
+    gtf: str,
+    fusionreport_csv: str,
+    hgnc: str,
+    sample: str,
+    out_file,
+) -> None:
+    """
+    Process FusionInspector and FusionReport data,
+    merge with GTF from FusionInspector and HGNC database,
+    and write a VCF file.
+
+    Args:
+        fusioninspector_in_file (str): Path to FusionInspector input file.
+        fusionreport_in_file (str): Path to Fusion-report input file.
+        sample (str): Sample name for the header.
+        hgnc (str): Path to HGNC file.
+        gtf (str): Path to output GTF file from FusionInspector in TSV format.
+        fusionreport_csv (str): Path to Fusion-report CSV output file.
+        out (str): Output VCF file path.
+
+    Adapted from: https://github.com/J35P312/MegaFusion
+    """
+    merged_df = (
+        build_fusioninspector_dataframe(fusioninspector_in_file)
+        .join(read_build_fusionreport(fusionreport_in_file), how="outer", on="FUSION")
+        .reset_index()
+    )
+    hgnc_df = build_hgnc_dataframe(hgnc)
+    df_symbol = merged_df[merged_df["Left_ensembl_gene_id"].isna()]
+    df_not_symbol = merged_df[merged_df["Left_ensembl_gene_id"].notna()]
+
+    df_not_symbol = hgnc_df.merge(
+        df_not_symbol,
+        how="right",
+        left_on="ensembl_gene_id",
+        right_on="Left_ensembl_gene_id",
+    )
+    df_symbol = hgnc_df.merge(
+        df_symbol, how="right", left_on="symbol", right_on="GeneA"
+    )
+    df = pd.concat([df_not_symbol, df_symbol])
+    df = df.rename(columns={"hgnc_id": "Left_hgnc_id"})
+
+    df_symbol = df[df["Right_ensembl_gene_id"].isna()]
+    df_not_symbol = df[df["Right_ensembl_gene_id"].notna()]
+
+    df_not_symbol = hgnc_df.merge(
+        df_not_symbol,
+        how="right",
+        left_on="ensembl_gene_id",
+        right_on="Right_ensembl_gene_id",
+    )
+    df_symbol = hgnc_df.merge(
+        df_symbol, how="right", left_on="symbol", right_on="GeneB"
+    )
+    df = pd.concat([df_not_symbol, df_symbol])
+    df = df.rename(columns={"hgnc_id": "Right_hgnc_id"})
+
+    gtf_df = build_gtf_dataframe(gtf)
+    all_df = df.merge(
+        gtf_df, how="left", left_on="CDS_LEFT_ID", right_on="Transcript_id"
+    )
+    all_df[["PosA", "orig_start", "orig_end"]] = (
+        all_df[["PosA", "orig_start", "orig_end"]].fillna(0).astype(int)
+    )
+
+    all_df = all_df[
+        (
+            (all_df["PosA"] >= all_df["orig_start"])
+            & (all_df["PosA"] <= all_df["orig_end"])
+        )
+        | ((all_df["orig_start"] == 0) & (all_df["orig_end"] == 0))
+    ]
+
+    all_df["Left_transcript_version"] = all_df["CDS_LEFT_ID"].astype(str).str.split(".").str[-1]
+
+    all_df.replace("", np.nan, inplace=True)
+    all_df = all_df.drop_duplicates()
+
+    all_df[["exon_number", "Left_transcript_version"]] = all_df[
+        ["exon_number", "Left_transcript_version"]
+    ].replace(0, np.nan)
+    # Fill non-empty values within each group for 'exon_number' and 'transcript_version'
+    all_df["exon_number"] = all_df.groupby("PosA")["exon_number"].transform(
+        lambda x: x.fillna(method="ffill").fillna(method="bfill")
+    )
+    all_df["Left_transcript_version"] = all_df.groupby("PosA")[
+        "Left_transcript_version"
+    ].transform(lambda x: x.fillna(method="ffill").fillna(method="bfill"))
+
+    all_df = all_df.rename(columns={"exon_number": "Left_exon_number"})
+    all_df = all_df[
+        [
+            "FUSION",
+            "GeneA",
+            "GeneB",
+            "PosA",
+            "PosB",
+            "ChromosomeA",
+            "ChromosomeB",
+            "TOOLS_HITS",
+            "SCORE",
+            "FOUND_DB",
+            "FOUND_IN",
+            "JunctionReadCount",
+            "SpanningFragCount",
+            "FFPM",
+            "PROT_FUSION_TYPE",
+            "CDS_LEFT_ID",
+            "CDS_RIGHT_ID",
+            "Left_transcript_version",
+            "Left_exon_number",
+            "Left_hgnc_id",
+            "Right_hgnc_id",
+            "Strand1",
+            "Strand2",
+            "annots",
+        ]
+    ].drop_duplicates()
+    all_df["CDS_RIGHT_ID"] = all_df["CDS_RIGHT_ID"].astype("str")
+    all_df = all_df.merge(
+        gtf_df, how="left", left_on="CDS_RIGHT_ID", right_on="Transcript_id"
+    )
+    all_df[["PosB", "orig_start", "orig_end"]] = all_df[
+        ["PosB", "orig_start", "orig_end"]
+    ].fillna(0)
+    all_df[["PosB", "orig_start", "orig_end"]] = all_df[
+        ["PosB", "orig_start", "orig_end"]
+    ].astype(int)
+    all_df = all_df[
+        (
+            (all_df["PosB"] >= all_df["orig_start"])
+            & (all_df["PosB"] <= all_df["orig_end"])
+        )
+        | ((all_df["orig_start"] == 0) & (all_df["orig_end"] == 0))
+    ]
+
+    all_df[["PosA", "PosB"]] = all_df[["PosA", "PosB"]].replace(0, np.nan)
+    all_df = all_df.replace("", np.nan)
+
+    all_df["Right_transcript_version"] = all_df["CDS_RIGHT_ID"].astype(str).str.split(".").str[-1]
+
+
+    all_df[["exon_number", "Right_transcript_version"]] = all_df[
+        ["exon_number", "Right_transcript_version"]
+    ].replace(0, np.nan)
+    # Fill non-empty values within each group for 'exon_number' and 'transcript_version'
+    all_df["exon_number"] = all_df.groupby("PosB")["exon_number"].transform(
+        lambda x: x.fillna(method="ffill").fillna(method="bfill")
+    )
+    all_df["Right_transcript_version"] = all_df.groupby("PosB")[
+        "Right_transcript_version"
+    ].transform(lambda x: x.fillna(method="ffill").fillna(method="bfill"))
+
+    all_df = all_df.rename(columns={"exon_number": "Right_exon_number"})
+
+    all_df = all_df[
+        [
+            "FUSION",
+            "GeneA",
+            "GeneB",
+            "PosA",
+            "PosB",
+            "ChromosomeA",
+            "ChromosomeB",
+            "TOOLS_HITS",
+            "SCORE",
+            "FOUND_DB",
+            "FOUND_IN",
+            "JunctionReadCount",
+            "SpanningFragCount",
+            "FFPM",
+            "PROT_FUSION_TYPE",
+            "CDS_LEFT_ID",
+            "CDS_RIGHT_ID",
+            "Left_transcript_version",
+            "Left_exon_number",
+            "Left_hgnc_id",
+            "Right_transcript_version",
+            "Right_exon_number",
+            "Right_hgnc_id",
+            "Strand1",
+            "Strand2",
+            "annots",
+        ]
+    ].drop_duplicates()
+    all_df = all_df.rename(columns={"FUSION": "Fusion"})
+    all_df = all_df.set_index("Fusion")
+
+    all_df = all_df.combine_first(read_fusionreport_csv(fusionreport_csv))
+
+    return write_vcf(column_manipulation(all_df), header_def(sample), out_file)
+
+
+def parse_args(argv=None):
+    """Define and immediately parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Validate and transform a tabular samplesheet.",
+        epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv",
+    )
+    parser.add_argument(
+        "--fusioninspector",
+        metavar="FUSIONINSPECTOR",
+        type=Path,
+        help="FusionInspector output in TSV format.",
+    )
+    parser.add_argument(
+        "--fusionreport",
+        metavar="FUSIONREPORT",
+        type=Path,
+        help="Fusionreport output in index/html format.",
+    )
+    parser.add_argument(
+        "--fusionreport_csv",
+        metavar="FUSIONREPORT_CSV",
+        type=Path,
+        help="Fusionreport output in CSV format.",
+    )
+    parser.add_argument(
+        "--fusioninspector_gtf",
+        metavar="GTF",
+        type=Path,
+        help="FusionInspector GTF output.",
+    )
+    parser.add_argument(
+        "--hgnc",
+        metavar="HGNC",
+        type=Path,
+        help="HGNC database.",
+    )
+    parser.add_argument(
+        "--sample", metavar="SAMPLE", type=Path, help="Sample name.", default="Sample"
+    )
+    parser.add_argument(
+        "--out",
+        metavar="OUT",
+        type=Path,
+        help="VCF output path.",
+    )
+    return parser.parse_args(argv)
+
+
+def header_def(sample: str) -> str:
+    """
+    Define the header of the VCF file
+    """
+    return '##fileformat=VCFv4.1\n\
+##ALT=<ID=BND,Description="Break end">\n\
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">\n\
+##INFO=<ID=CHRA,Number=1,Type=String,Description="Chromosome A">\n\
+##INFO=<ID=CHRB,Number=1,Type=String,Description="Chromosome B">\n\
+##INFO=<ID=GENEA,Number=.,Type=String,Description="Gene A">\n\
+##INFO=<ID=GENEB,Number=.,Type=String,Description="Gene B">\n\
+##INFO=<ID=POSA,Number=.,Type=String,Description="Breakpoint position A">\n\
+##INFO=<ID=POSB,Number=.,Type=String,Description="Breakpoint position B">\n\
+##INFO=<ID=ORIENTATION,Number=.,Type=String,Description="Strand1 and strand2 directions">\n\
+##INFO=<ID=FOUND_DB,Number=.,Type=String,Description="Databases in which the fusion has been found">\n\
+##INFO=<ID=FOUND_IN,Number=.,Type=String,Description="Callers that have found the fusion">\n\
+##INFO=<ID=TOOL_HITS,Number=.,Type=Integer,Description="Number of tools that found the fusion">\n\
+##INFO=<ID=SCORE,Number=.,Type=Float,Description="Score from fusionreport">\n\
+##INFO=<ID=FRAME_STATUS,Number=.,Type=String,Description="Frame status of the fusion">\n\
+##INFO=<ID=TRANSCRIPT_ID_A,Number=.,Type=String,Description="Transcript id A ">\n\
+##INFO=<ID=TRANSCRIPT_ID_B,Number=.,Type=String,Description="Transcript id B">\n\
+##INFO=<ID=TRANSCRIPT_VERSION_A,Number=.,Type=Float,Description="Transcript version A">\n\
+##INFO=<ID=TRANSCRIPT_VERSION_B,Number=.,Type=Float,Description="Transcript version B">\n\
+##INFO=<ID=HGNC_ID_A,Number=.,Type=Float,Description="HGNC id A">\n\
+##INFO=<ID=HGNC_ID_B,Number=.,Type=Float,Description="HGNC id A">\n\
+##INFO=<ID=EXON_NUMBER_A,Number=.,Type=Float,Description="Exon number A">\n\
+##INFO=<ID=EXON_NUMBER_B,Number=.,Type=Float,Description="Exon number B">\n\
+##INFO=<ID=ANNOTATIONS,Number=.,Type=String,Description="Annotations from FusionInspector">\n\
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n\
+##FORMAT=<ID=DV,Number=1,Type=Integer,Description="Number of paired-ends that support the event">\n\
+##FORMAT=<ID=RV,Number=1,Type=Integer,Description="Number of split reads that support the event">\n\
+##FORMAT=<ID=FFPM,Number=1,Type=Float,Description="Fusion fragments per million total RNA-seq fragments">\n\
+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{}'.format(
+        sample
+    )
+
+
+def convert_to_list(annots_str: str) -> list:
+    try:
+        return ast.literal_eval(annots_str)
+    except (SyntaxError, ValueError):
+        return np.nan
+
+
+def build_fusioninspector_dataframe(file: str) -> pd.DataFrame:
+    """
+    Read FusionInspector output from a CSV file, preprocess the data, and set 'FUSION' as the index.
+    """
+    df = pd.read_csv(file, sep="\t")
+    df = df.rename(columns={"#FusionName": "FUSION"})
+    if not (df.empty):
+        df[["ChromosomeA", "PosA", "Strand1"]] = df["LeftBreakpoint"].str.split(
+            ":", expand=True
+        )
+        df[["ChromosomeB", "PosB", "Strand2"]] = df["RightBreakpoint"].str.split(
+            ":", expand=True
+        )
+        df[["LeftGeneName", "Left_ensembl_gene_id"]] = df["LeftGene"].str.split(
+            "^", expand=True
+        )
+        df[["RightGeneName", "Right_ensembl_gene_id"]] = df["RightGene"].str.split(
+            "^", expand=True
+        )
+        df["annots"] = (
+            df["annots"]
+            .apply(convert_to_list)
+            .apply(
+                lambda x: (
+                    ",".join(map(str, x))
+                    if isinstance(x, list)
+                    else str(x) if pd.notna(x) else ""
+                )
+            )
+        )
+    else:
+        for i in [
+            "ChromosomeA",
+            "Strand1",
+            "ChromosomeB",
+            "Strand2",
+            "LeftGeneName",
+            "Left_ensembl_gene_id",
+            "RightGeneName",
+            "Right_ensembl_gene_id",
+            "annots",
+        ]:
+            df[i] = ""
+        for j in [
+            "PosA",
+            "PosB",
+        ]:
+            df[j] = np.nan
+
+    return df.set_index(["FUSION"])
+
+
+def replace_value_with_column_name(
+    row: pd.Series, value_to_replace: str, column_name: str
+) -> str:
+    """
+    Replace a specific value in a row with the corresponding column name.
+    """
+    new_values = ""
+    for col_name, value in row.items():
+        if col_name == column_name:
+            if value == value_to_replace:
+                new_values = col_name
+            else:
+                new_values = ""
+    return new_values
+
+
+def concatenate_columns(row: pd.Series) -> str:
+    """
+    Concatenate non-empty values in a row into a single string separated by commas.
+    """
+    non_empty_values = [str(value) for value in row if value != ""]
+    return ",".join(non_empty_values)
+
+
+def read_build_fusionreport(fusionreport_file: str) -> pd.DataFrame:
+    """
+    Read and preprocess fusion-report data from a file, including handling missing tool columns,
+    getting the columns with each tool and create a new FOUND_IN column with all the tool hits.
+    Convert the list of databases in FOUND_DB into a joined string with a comma separator.
+    Make all column headers uppercase.
+    """
+    with open(fusionreport_file) as f:
+        from_html = [
+            line.split('rows": ')[1] for line in f if 'name="fusion_list' in line
+        ]
+        tmp = str(from_html)[2:]
+        tmp2 = tmp.split(', "tools": ')[0]
+        fusion_report = pd.DataFrame(ast.literal_eval(tmp2))
+    if not "arriba" in fusion_report.columns:
+        fusion_report["arriba"] = ""
+    if not "fusioncatcher" in fusion_report.columns:
+        fusion_report["fusioncatcher"] = ""
+    if not "starfusion" in fusion_report.columns:
+        fusion_report["starfusion"] = ""
+    fusion_report["arriba"] = fusion_report[["arriba"]].apply(
+        replace_value_with_column_name, args=("true", "arriba"), axis=1
+    )
+    fusion_report["fusioncatcher"] = fusion_report[["fusioncatcher"]].apply(
+        replace_value_with_column_name, args=("true", "fusioncatcher"), axis=1
+    )
+    fusion_report["starfusion"] = fusion_report[["starfusion"]].apply(
+        replace_value_with_column_name, args=("true", "starfusion"), axis=1
+    )
+    fusion_report["FOUND_IN"] = fusion_report[
+        ["arriba", "starfusion", "fusioncatcher"]
+    ].apply(concatenate_columns, axis=1)
+    fusion_report.columns = fusion_report.columns.str.upper()
+    fusion_report["FOUND_DB"] = fusion_report["FOUND_DB"].apply(
+        lambda x: ",".join(x) if len(x) > 0 else ""
+    )
+    fusion_report[["GeneA", "GeneB"]] = fusion_report["FUSION"].str.split(
+        "--", expand=True
+    )
+
+    return fusion_report[
+        ["FUSION", "GeneA", "GeneB", "TOOLS_HITS", "SCORE", "FOUND_DB", "FOUND_IN"]
+    ].set_index(["FUSION"])
+
+
+def read_fusionreport_csv(file: str) -> pd.DataFrame:
+    df = pd.read_csv(file)
+    columns_to_iterate = ["starfusion", "arriba", "fusioncatcher"]
+    for column in columns_to_iterate:
+        if column not in df.columns:
+            df[column] = ""
+    df[["starfusion", "arriba", "fusioncatcher"]] = df[
+        ["starfusion", "arriba", "fusioncatcher"]
+    ].astype("str")
+    for index, row in df.iterrows():
+        for column in columns_to_iterate:
+            cell_value = row[column]
+
+            if "#" in cell_value:
+                df.at[index, column] = df.at[index, column].split(",")[0]
+                df.at[index, column] = df.at[index, column].replace("position: ", "")
+                df.at[index, "A"] = df.at[index, column].split("#")[0]
+                df.at[index, "B"] = df.at[index, column].split("#")[1]
+                df.at[index, "ChromosomeA"] = df.at[index, "A"].split(":")[0]
+                df.at[index, "PosA"] = df.at[index, "A"].split(":")[1]
+                if "+" in df.at[index, "A"] or "-" in df.at[index, "A"]:
+                    df.at[index, "StrandA"] = df.at[index, "A"].split(":")[2]
+                else:
+                    df.at[index, "StrandA"] = ""
+
+                df.at[index, "ChromosomeB"] = df.at[index, "B"].split(":")[0]
+                df.at[index, "PosB"] = df.at[index, "B"].split(":")[1]
+                if "+" in df.at[index, "B"] or "-" in df.at[index, "B"]:
+                    df.at[index, "StrandB"] = df.at[index, "B"].split(":")[2]
+                else:
+                    df.at[index, "StrandB"] = ""
+
+                break
+    df[["GeneA", "GeneB"]] = df["Fusion"].str.split("--", expand=True)
+    df = df.set_index("Fusion")
+    df.to_csv("tmp.csv")
+    return df[
+        [
+            "GeneA",
+            "GeneB",
+            "ChromosomeA",
+            "PosA",
+            "StrandA",
+            "ChromosomeB",
+            "PosB",
+            "StrandB",
+        ]
+    ]
+
+
+def column_manipulation(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Manipulate and prepare DataFrame for VCF file creation.
+    """
+    df["ALT"] = ""
+    df = df.reset_index()
+    df["FORMAT"] = "GT:DV:RV:FFPM"
+    df["ID"] = "."
+    df["QUAL"] = "."
+    df["FILTER"] = "PASS"
+    df["REF"] = "N"
+    df["INFO"] = ""
+    df["Sample"] = ""
+    df["Strand1"] = df["Strand1"].astype(str)
+    df["JunctionReadCount"] = df["JunctionReadCount"].fillna(0).astype(int).astype(str)
+    df["SpanningFragCount"] = df["SpanningFragCount"].fillna(0).astype(int).astype(str)
+    df["FFPM"] = df["FFPM"].fillna(0).astype(float).astype(str)
+    df["ChromosomeA"] = df["ChromosomeA"].fillna(0).astype(str)
+    df["ChromosomeB"] = df["ChromosomeB"].fillna(0).astype(str)
+    df["Left_hgnc_id"] = df["Left_hgnc_id"].fillna(0).astype(int).astype(str)
+    df["Right_hgnc_id"] = df["Right_hgnc_id"].fillna(0).astype(int).astype(str)
+    df["Left_exon_number"] = df["Left_exon_number"].fillna(0).astype(int).astype(str)
+    df["Right_exon_number"] = df["Right_exon_number"].fillna(0).astype(int).astype(str)
+    df["Left_transcript_version"] = (
+        pd.to_numeric(df["Left_transcript_version"], errors="coerce").fillna(0).astype(int).astype(str)
+    )
+    df["Right_transcript_version"] = (
+        pd.to_numeric(df["Right_transcript_version"], errors="coerce").fillna(0).astype(int).astype(str)
+    )
+    df["PosA"] = df["PosA"].fillna(0).astype(int).astype(str)
+    df["PosB"] = df["PosB"].fillna(0).astype(int).astype(str)
+    df["PROT_FUSION_TYPE"] = df["PROT_FUSION_TYPE"].replace(".", "nan")
+    df["CDS_LEFT_ID"] = df["CDS_LEFT_ID"].replace(".", "nan")
+    df["CDS_RIGHT_ID"] = df["CDS_RIGHT_ID"].replace(".", "nan")
+
+    for index, row in df.iterrows():
+        if row["Strand1"] == "-" and row["Strand2"] == "-":
+            df.loc[index, "ALT"] = f'[{row["ChromosomeB"]}:{row["PosB"]}[N'
+        elif row["Strand1"] == "+" and row["Strand2"] == "-":
+            df.loc[index, "ALT"] = f'N]{row["ChromosomeB"]}:{row["PosB"]}]'
+        elif row["Strand1"] == "-" and row["Strand2"] == "+":
+            df.loc[index, "ALT"] = f'N]{row["ChromosomeB"]}:{row["PosB"]}]'
+        else:
+            df.loc[index, "ALT"] = f'N[{row["ChromosomeB"]}:{row["PosB"]}['
+
+        df.loc[index, "INFO"] = (
+            f"SVTYPE=BND;CHRA={row['ChromosomeA']};CHRB={row['ChromosomeB']};GENEA={row['GeneA']};GENEB={row['GeneB']};"
+            f"POSA={row['PosA']};POSB={row['PosB']};ORIENTATION={row['Strand1']},{row['Strand2']};FOUND_DB={row['FOUND_DB']};"
+            f"FOUND_IN={row['FOUND_IN']};TOOL_HITS={row['TOOLS_HITS']};SCORE={row['SCORE']};FRAME_STATUS={row['PROT_FUSION_TYPE']};"
+            f"TRANSCRIPT_ID_A={row['CDS_LEFT_ID']};TRANSCRIPT_ID_B={row['CDS_RIGHT_ID']};"
+            f"TRANSCRIPT_VERSION_A={row['Left_transcript_version']};TRANSCRIPT_VERSION_B={row['Right_transcript_version']};"
+            f"HGNC_ID_A={row['Left_hgnc_id']};HGNC_ID_B={row['Right_hgnc_id']};"
+            f"EXON_NUMBER_A={row['Left_exon_number']};EXON_NUMBER_B={row['Right_exon_number']};"
+            f"ANNOTATIONS={row['annots']}"
+        )
+        df.loc[index, "Sample"] = (
+            f"./1:{row['JunctionReadCount']}:{row['SpanningFragCount']}:{row['FFPM']}"
+        )
+
+    return df
+
+
+def write_vcf(df_to_print: pd.DataFrame, header: str, out_file: str) -> None:
+    """
+    Write a VCF file with a specified DataFrame, header, and output file path.
+    """
+    df_to_print[
+        [
+            "ChromosomeA",
+            "PosA",
+            "ID",
+            "REF",
+            "ALT",
+            "QUAL",
+            "FILTER",
+            "INFO",
+            "FORMAT",
+            "Sample",
+        ]
+    ].to_csv(
+        path_or_buf=out_file, sep="\t", header=None, index=False, quoting=csv.QUOTE_NONE
+    )
+
+    with open(out_file, "r+") as f:
+        content = f.read()
+        f.seek(0, 0)
+        f.write(header.rstrip("\r\n") + "\n" + content)
+
+
+def build_hgnc_dataframe(file: str) -> pd.DataFrame:
+    """
+    Build a DataFrame from HGNC input file, extracting 'hgnc_id' and 'ensembl_gene_id' columns.
+    """
+    df = pd.read_csv(file, sep="\t", low_memory=False)
+    df["hgnc_id"] = df["hgnc_id"].str.replace("HGNC:", "")
+    return df[["hgnc_id", "ensembl_gene_id", "symbol"]].dropna()
+
+
+def build_gtf_dataframe(file: str) -> pd.DataFrame:
+    """
+    Build a DataFrame from GTF file converted in TSV, extracting relevant columns.
+    """
+    df = pd.read_csv(file, sep="\t")
+    df[["fusion_dump", "Transcript_id"]] = df["transcript_id"].str.split(
+        "^", expand=True
+    )
+    df[["orig_chromosome", "orig_start", "orig_end", "orig_dir"]] = df[
+        "orig_coord_info"
+    ].str.split(",", expand=True)
+    return df[
+        ["Transcript_id", "exon_number", "orig_start", "orig_end"]
+    ]
+
+
+def main(argv=None):
+    """Coordinate argument parsing and program execution."""
+    args = parse_args(argv)
+    if (
+        not args.fusioninspector.is_file()
+        or not args.fusionreport.is_file()
+        or not args.fusioninspector_gtf
+        or not args.fusionreport_csv
+        or not args.hgnc
+    ):
+        logger.error(
+            f"The given input file {args.fusioninspector} or {args.fusionreport} was not found!"
+        )
+        sys.exit(2)
+    vcf_collect(
+        args.fusioninspector,
+        args.fusionreport,
+        args.fusioninspector_gtf,
+        args.fusionreport_csv,
+        args.hgnc,
+        args.sample,
+        args.out,
+    )
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/subworkflows/local/bam_gene_fusion/main.nf b/subworkflows/local/bam_gene_fusion/main.nf
index b657937..75d1539 100644
--- a/subworkflows/local/bam_gene_fusion/main.nf
+++ b/subworkflows/local/bam_gene_fusion/main.nf
@@ -3,7 +3,8 @@ include { ARRIBA_ARRIBA         } from '../../../modules/nf-core/arriba/arriba/m
 include { FUSIONINSPECTOR       } from '../../../modules/nf-core/fusioninspector/main'
 include { FUSIONREPORT_DETECT   } from '../../../modules/nf-core/fusionreport/detect/main'
 include { FUSIONREPORT_DOWNLOAD } from '../../../modules/nf-core/fusionreport/download/main'
-
+include { VCF_COLLECT           } from '../../../modules/local/vcf_collect/main'
+include { HGNC_DOWNLOAD         } from '../../../modules/local/hgnc_download/main'
 
 workflow BAM_GENE_FUSION {
     take:
@@ -54,10 +55,6 @@ workflow BAM_GENE_FUSION {
         .map{ meta, reads -> [meta, reads] }
         .join(FUSIONREPORT_DETECT.out.fusion_list
               .map{meta, fusion_list -> [meta,[fusion_list]]})
-
-    ch_reads_fusions.view()
-
-    ch_starfusion_ref.view()
     
     FUSIONINSPECTOR(
         ch_reads_fusions,
@@ -65,6 +62,25 @@ workflow BAM_GENE_FUSION {
             [[id: "starfusion_index"], fusion_ref]}
     )
 
+    HGNC_DOWNLOAD()
+
+    FUSIONINSPECTOR.out.tsv
+            .join(FUSIONINSPECTOR.out.out_gtf)
+            .join(FUSIONREPORT_DETECT.out.report)
+        .join(FUSIONREPORT_DETECT.out.csv).view()
+
+    
+    VCF_COLLECT(
+        FUSIONINSPECTOR.out.tsv
+            .join(FUSIONINSPECTOR.out.out_gtf)
+            .join(FUSIONREPORT_DETECT.out.report)
+            .join(FUSIONREPORT_DETECT.out.csv),
+        HGNC_DOWNLOAD.out.hgnc_ref
+            .map{ hgnc_ref -> [[id: hgnc_ref.getSimpleName()], hgnc_ref]},
+        HGNC_DOWNLOAD.out.hgnc_date
+            .map{ hgnc_date -> [[id: hgnc_date.getSimpleName()], hgnc_date]}  
+    )
+
     
     emit:
     starfusion_fusions       = STARFUSION_DETECT.out.fusions

From 44df2a5280e22c89c396caf61392c80604270790 Mon Sep 17 00:00:00 2001
From: Jorisvansteenbrugge
 <7196110+Jorisvansteenbrugge@users.noreply.github.com>
Date: Fri, 20 Feb 2026 13:17:08 +0100
Subject: [PATCH 3/5] debug statements

---
 subworkflows/local/bam_gene_fusion/main.nf | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/subworkflows/local/bam_gene_fusion/main.nf b/subworkflows/local/bam_gene_fusion/main.nf
index 75d1539..3ae9669 100644
--- a/subworkflows/local/bam_gene_fusion/main.nf
+++ b/subworkflows/local/bam_gene_fusion/main.nf
@@ -64,10 +64,12 @@ workflow BAM_GENE_FUSION {
 
     HGNC_DOWNLOAD()
 
-    FUSIONINSPECTOR.out.tsv
-            .join(FUSIONINSPECTOR.out.out_gtf)
-            .join(FUSIONREPORT_DETECT.out.report)
-        .join(FUSIONREPORT_DETECT.out.csv).view()
+    FUSIONINSPECTOR.out.tsv.view()
+
+    
+       
+    FUSIONREPORT_DETECT.out.report.view()
+        
 
     
     VCF_COLLECT(

From 04e6c9e5f04170099f354ed96267c9afc62b5068 Mon Sep 17 00:00:00 2001
From: Jorisvansteenbrugge
 <7196110+Jorisvansteenbrugge@users.noreply.github.com>
Date: Fri, 20 Feb 2026 14:18:06 +0100
Subject: [PATCH 4/5] add agat gff2tsv

---
 modules.json                                  |  5 ++
 .../agat/convertspgff2tsv/environment.yml     |  5 ++
 modules/nf-core/agat/convertspgff2tsv/main.nf | 45 ++++++++++++++
 .../nf-core/agat/convertspgff2tsv/meta.yml    | 54 +++++++++++++++++
 .../agat/convertspgff2tsv/tests/main.nf.test  | 59 +++++++++++++++++++
 .../convertspgff2tsv/tests/main.nf.test.snap  | 48 +++++++++++++++
 6 files changed, 216 insertions(+)
 create mode 100644 modules/nf-core/agat/convertspgff2tsv/environment.yml
 create mode 100644 modules/nf-core/agat/convertspgff2tsv/main.nf
 create mode 100644 modules/nf-core/agat/convertspgff2tsv/meta.yml
 create mode 100644 modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test
 create mode 100644 modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test.snap

diff --git a/modules.json b/modules.json
index fcd52fc..f653380 100644
--- a/modules.json
+++ b/modules.json
@@ -5,6 +5,11 @@
         "https://github.com/nf-core/modules.git": {
             "modules": {
                 "nf-core": {
+                    "agat/convertspgff2tsv": {
+                        "branch": "master",
+                        "git_sha": "55ed5f4aabcdcd7a4cc44e1700f956756c7e8532",
+                        "installed_by": ["modules"]
+                    },
                     "arriba/arriba": {
                         "branch": "master",
                         "git_sha": "025a6f75e1f72e4ab60abb4bd65b3f289d4ad910",
diff --git a/modules/nf-core/agat/convertspgff2tsv/environment.yml b/modules/nf-core/agat/convertspgff2tsv/environment.yml
new file mode 100644
index 0000000..0e342bf
--- /dev/null
+++ b/modules/nf-core/agat/convertspgff2tsv/environment.yml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - "bioconda::agat=1.5.1"
diff --git a/modules/nf-core/agat/convertspgff2tsv/main.nf b/modules/nf-core/agat/convertspgff2tsv/main.nf
new file mode 100644
index 0000000..7e7f946
--- /dev/null
+++ b/modules/nf-core/agat/convertspgff2tsv/main.nf
@@ -0,0 +1,45 @@
+process AGAT_CONVERTSPGFF2TSV {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/03/033434db0bd6ba28660401e1059286f36641fd8ce55faa11973fe5eaf312adcd/data' :
+        'community.wave.seqera.io/library/agat:1.5.1--ae3cd948ce5e9795' }"
+
+    input:
+    tuple val(meta), path(gff)
+
+    output:
+    tuple val(meta), path("*.tsv"), emit: tsv
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args   = task.ext.args   ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    agat_convert_sp_gff2tsv.pl \\
+        --gff ${gff} \\
+        --output ${prefix}.tsv \\
+        ${args}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        agat: \$(agat_convert_sp_gff2tsv.pl --help | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.tsv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        agat: \$(agat_convert_sp_gff2tsv.pl --help | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/agat/convertspgff2tsv/meta.yml b/modules/nf-core/agat/convertspgff2tsv/meta.yml
new file mode 100644
index 0000000..4c80ee8
--- /dev/null
+++ b/modules/nf-core/agat/convertspgff2tsv/meta.yml
@@ -0,0 +1,54 @@
+name: agat_convertspgff2tsv
+description: |
+  Converts a GFF/GTF file into a TSV file
+keywords:
+  - genome
+  - gff
+  - gtf
+  - conversion
+  - tsv
+tools:
+  - agat:
+      description: "AGAT is a toolkit for manipulation and getting information from
+        GFF/GTF files"
+      homepage: "https://github.com/NBISweden/AGAT"
+      documentation: "https://agat.readthedocs.io/"
+      tool_dev_url: "https://github.com/NBISweden/AGAT"
+      doi: "10.5281/zenodo.3552717"
+      licence: ["GPL v3"]
+      identifier: biotools:AGAT
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - gff:
+        type: file
+        description: Annotation file in GFF3/GTF format
+        pattern: "*.{gff, gtf}"
+        ontologies:
+          - edam: "http://edamontology.org/format_1975" # GFF3
+          - edam: "http://edamontology.org/format_2306" # GTF
+output:
+  tsv:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information
+      - "*.tsv":
+          type: file
+          description: Annotation file in TSV format
+          pattern: "*.{gtf}"
+          ontologies:
+            - edam: http://edamontology.org/format_2306 # GTF
+  versions:
+    - versions.yml:
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+        ontologies:
+          - edam: "http://edamontology.org/format_3750" # YAML
+authors:
+  - "@rannick"
+maintainers:
+  - "@gallvp"
diff --git a/modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test b/modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test
new file mode 100644
index 0000000..6a2e894
--- /dev/null
+++ b/modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test
@@ -0,0 +1,59 @@
+nextflow_process {
+
+    name "Test Process AGAT_CONVERTSPGFF2TSV"
+    script "../main.nf"
+    process "AGAT_CONVERTSPGFF2TSV"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "agat"
+    tag "agat/convertspgff2tsv"
+
+    test("sarscov2 - genome [gff3]") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                            [ id: 'test' ], // meta map
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - genome [gff3] - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                            [ id: 'test' ], // meta map
+                            file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.tsv.collect { file(it[1]).getName() } +
+                                process.out.versions).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test.snap b/modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test.snap
new file mode 100644
index 0000000..581e56f
--- /dev/null
+++ b/modules/nf-core/agat/convertspgff2tsv/tests/main.nf.test.snap
@@ -0,0 +1,48 @@
+{
+    "sarscov2 - genome [gff3] - stub": {
+        "content": [
+            [
+                "test.tsv",
+                "versions.yml:md5,e17c06a74b2cfa52ea1ef4703dae4ee3"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.04.8"
+        },
+        "timestamp": "2025-10-17T11:13:08.280583"
+    },
+    "sarscov2 - genome [gff3]": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.tsv:md5,8373d2035689d23694f87606116cdccd"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,e17c06a74b2cfa52ea1ef4703dae4ee3"
+                ],
+                "tsv": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.tsv:md5,8373d2035689d23694f87606116cdccd"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,e17c06a74b2cfa52ea1ef4703dae4ee3"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.04.8"
+        },
+        "timestamp": "2025-10-17T11:13:02.775461"
+    }
+}
\ No newline at end of file

From 4b457345de2a8bdd829a141fba25b6ee026e4987 Mon Sep 17 00:00:00 2001
From: Jorisvansteenbrugge
 <7196110+Jorisvansteenbrugge@users.noreply.github.com>
Date: Fri, 20 Feb 2026 17:24:16 +0100
Subject: [PATCH 5/5] VCF collect export

---
 conf/modules.config                        |  3 ++-
 subworkflows/local/bam_gene_fusion/main.nf | 17 ++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 95dd3bc..3261deb 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -55,7 +55,7 @@ process {
         ext.args = { params.fusioninspector_limitSjdbInsertNsj != 1000000 ? "--STAR_xtra_params \"--limitSjdbInsertNsj ${params.fusioninspector_limitSjdbInsertNsj}\"" : '' }
         ext.args2 = '--annotate --examine_coding_effect'
         publishDir = [
-            path: { "${params.outdir}/fusioninspector/${meta.id}" },
+            path: { "${params.outdir}/fusions/fusioninspector/${meta.id}" },
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
@@ -246,6 +246,7 @@ process {
     }
     withName: FUSIONREPORT_DETECT {
         ext.args = {'--no-cosmic'}
+        ext.args2 = "--export csv"
 
         publishDir     = [
             path: { "${params.outdir}/fusions/fusion_report/${meta.id}/" },
diff --git a/subworkflows/local/bam_gene_fusion/main.nf b/subworkflows/local/bam_gene_fusion/main.nf
index 3ae9669..ff0278f 100644
--- a/subworkflows/local/bam_gene_fusion/main.nf
+++ b/subworkflows/local/bam_gene_fusion/main.nf
@@ -1,4 +1,5 @@
 include { STARFUSION_DETECT     } from '../../../modules/nf-core/starfusion/detect/main'
+include { AGAT_CONVERTSPGFF2TSV } from '../../../modules/nf-core/agat/convertspgff2tsv/main'
 include { ARRIBA_ARRIBA         } from '../../../modules/nf-core/arriba/arriba/main'
 include { FUSIONINSPECTOR       } from '../../../modules/nf-core/fusioninspector/main'
 include { FUSIONREPORT_DETECT   } from '../../../modules/nf-core/fusionreport/detect/main'
@@ -64,17 +65,15 @@ workflow BAM_GENE_FUSION {
 
     HGNC_DOWNLOAD()
 
-    FUSIONINSPECTOR.out.tsv.view()
-
-    
-       
-    FUSIONREPORT_DETECT.out.report.view()
-        
-
+    AGAT_CONVERTSPGFF2TSV(
+        FUSIONINSPECTOR.out.out_gtf
+            .filter { _meta, file -> file.exists() && file.size() > 0 }
+    )
     
     VCF_COLLECT(
-        FUSIONINSPECTOR.out.tsv
-            .join(FUSIONINSPECTOR.out.out_gtf)
+        FUSIONINSPECTOR.out.abridged_tsv
+            .filter{ _meta, file -> file.exists() && file.size() > 0 }
+            .join(AGAT_CONVERTSPGFF2TSV.out.tsv)
             .join(FUSIONREPORT_DETECT.out.report)
             .join(FUSIONREPORT_DETECT.out.csv),
         HGNC_DOWNLOAD.out.hgnc_ref