Adding new rule to create filtered transcripts FASTA file.

skchronicles · skchronicles · commit 99eb7f00ed91 · 2025-06-24T15:39:50.000-04:00
diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -154,6 +154,18 @@ rule all:
             case=provided(case_groups, quantify_transcripts != "None"),
             control=provided(ctrl_groups, quantify_transcripts != "None"),
         ),
+        # Create filtered transcripts FASTA file,
+        # conditionally runs if the quantify
+        # transcripts option is provided and
+        # there are groups/contrasts.
+        # @imported from rules/isoformswitchanalyzer.smk
+        # @output of rule isoformswitchanalyzer_isoformfasta
+        expand(
+            join(workpath, "differential_switching", batch_id, "{case}_vs_{control}", "{case}-{control}_top_isoform_switches.fa"),
+            zip,
+            case=provided(case_groups, quantify_transcripts != "None"),
+            control=provided(ctrl_groups, quantify_transcripts != "None"),
+        ),
 
 
 # Import rules
diff --git a/workflow/rules/isoformswitchanalyzer.smk b/workflow/rules/isoformswitchanalyzer.smk
@@ -311,7 +311,6 @@ rule isoformswitchanalyzer_diffswitching:
         vec = join(workpath, "differential_switching", batch_id, "{case}_vs_{control}", "sample_vector.tsv"),
     output:
         swt = join(workpath, "differential_switching", batch_id, "{case}_vs_{control}", "{case}-{control}_top_isoform_switches.tsv"),
-
     params:
         rname  = "diffswitch",
         outdir = join(workpath, "differential_switching", batch_id, "{case}_vs_{control}"),
@@ -340,3 +339,45 @@ rule isoformswitchanalyzer_diffswitching:
         --control_group {wildcards.control} \\
         --method saturn
     """
+
+
+rule isoformswitchanalyzer_isoformfasta:
+    """
+    Data-processing step to write isoform sequences based on the
+    IsoformSwitchAnalyzeR results. This rule will create a FASTA
+    file containing the sequence of each transcript of an gene
+    with a significant isoform switch. So if a gene contains N
+    transcripts and it had a signficant switching event (based
+    on the fdr_filter), then the transcript sequences of all
+    N transcripts will be written to the FASTA file. 
+    @Input:
+        Differential isoform switching results (indirect-gather-per-contrast),
+        Splicing annotation file
+    @Output:
+        Differential isoform switching results
+    """
+    input:
+        swt = join(workpath, "differential_switching", batch_id, "{case}_vs_{control}", "{case}-{control}_top_isoform_switches.tsv"),
+        spl = join(workpath, "temp", "splicing_annotation.tsv"),
+    output:
+        fa  = join(workpath, "differential_switching", batch_id, "{case}_vs_{control}", "{case}-{control}_top_isoform_switches.fa"),
+    params:
+        rname  = "isofasta",
+        pyscript    = join(workpath, "workflow", "scripts", "isoform_sequences.py"),
+        transcripts = quantify_transcripts,
+        fdr_filter  = 0.1,
+    resources:
+        mem   = allocated("mem",  "isoformswitchanalyzer_isoformfasta", cluster),
+        time  = allocated("time", "isoformswitchanalyzer_isoformfasta", cluster),
+    threads: int(allocated("threads", "isoformswitchanalyzer_isoformfasta", cluster))
+    container: config["images"]["isoformswitchanalyzer"]
+    shell: """
+    # Create the filtered transcripts FASTA file
+    {params.pyscript} \\
+        -i {input.swt} \\
+        -s {input.spl} \\
+        -t {params.transcripts} \\
+        -o {output.fa} \\
+        -f {params.fdr_filter} \\
+        -d '|'
+    """
diff --git a/workflow/rules/leafcutter.smk b/workflow/rules/leafcutter.smk
@@ -18,7 +18,9 @@ rule leafcutter_gtf2exons:
     @Input:
         Input GTF file (singleton)
     @Output:
-        Exons TSV file
+        Exons TSV file,
+        Exon annotation file,
+        Splicing annotation file
     """
     input:
         gtf   = gtf_file
@@ -403,9 +405,11 @@ rule leafcutter_prepleafviz:
         Cluster significant table output by leafcutter_ds.R,
         Per-junction effect sizes table output by leafcutter_ds.R,
         All introns TSV file,
-
+        Splicing annotation file
     @Output:
-        Rdata file to input to leafviz shiny app
+        Rdata file to input to leafviz shiny app,
+        Intron annotation file,
+        Merged and annotated leafcutter results
     """
     input:
         num = join(workpath, "junctions", "leafcutter_perind_numers.counts.gz"),