From a26dddab5bf2753d48016e1e155c6a4ad20b4f4f Mon Sep 17 00:00:00 2001
From: mikemartinez99 <mike.j.martinez99@gmail.com>
Date: Fri, 17 Apr 2026 14:10:18 -0400
Subject: [PATCH 01/11] Ribodetector and RustQC additions

- Implemented ribodetector rules and associated conda environment
- Implemented RustQC and added singularity requirement in job script to run it.
- Updated multiqc config to support branding to CQB and new QC report
- IMplemented basic QC report (what we previously had) and detailed QC report
---
 Snakefile                    | 356 +++++++++++++++++++++++------------
 env_config/pcaplot.yaml      |   3 +-
 env_config/ribodetector.yaml |  10 +
 job.script.sh                |  27 +--
 multiqc_config.yaml          |  23 ++-
 scripts/ribo_stats.sh        |  79 ++++++++
 6 files changed, 353 insertions(+), 145 deletions(-)
 create mode 100644 env_config/ribodetector.yaml
 create mode 100644 scripts/ribo_stats.sh

diff --git a/Snakefile b/Snakefile
index cce1870..8f7c9dc 100644
--- a/Snakefile
+++ b/Snakefile
@@ -4,14 +4,22 @@
 #####~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 import pandas as pd
 
-# set config file
+#----- set config file
 configfile: "config.yaml"
+print(config)
 
-# read in sample data
+#----- read in sample data
 samples_df = pd.read_csv(config["sample_csv"]).set_index("sample_id", drop=False)
 sample_list = list(samples_df['sample_id'])
 
-print(config)
+#----- Set additional rules based on params
+REMOVE_rRNA = config.get("remove_rRNA", False)
+if REMOVE_rRNA:
+    R1_FASTQ_INPUT = "ribodetector/{sample}/{sample}.nonrrna.1.fq.gz" 
+    R2_FASTQ_INPUT = "ribodetector/{sample}/{sample}.nonrrna.2.fq.gz" if config["layout"]=="paired" else None
+else:
+    R1_FASTQ_INPUT = "trimming/{sample}.R1.trim.fastq.gz"
+    R2_FASTQ_INPUT = "trimming/{sample}.R2.trim.fastq.gz" if config["layout"]=="paired" else None
 
 #####~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # define rules
@@ -22,10 +30,12 @@ rule all:
         expand("trimming/{sample}.R1.trim.fastq.gz", sample=sample_list),
         expand("trimming/{sample}.R2.trim.fastq.gz", sample=sample_list) if config["layout"] == "paired" else [],
         expand("trimming/{sample}.cutadapt.report", sample=sample_list),
+        expand("ribodetector/{sample}/{sample}.nonrrna.1.fq.gz", sample=sample_list) if REMOVE_rRNA else [],
+        expand("ribodetector/{sample}/{sample}.nonrrna.2.fq.gz", sample=sample_list) if REMOVE_rRNA and config["layout"] == "paired" else [],
+        "ribodetector/rrna_norrna_pct_mqc.tsv" if REMOVE_rRNA else [],
         expand("alignment/{sample}.srt.bam", sample=sample_list),
         expand("alignment/{sample}.srt.bam.bai", sample=sample_list),
         expand("alignment/{sample}.srt.bam", sample=sample_list), #commenting out until condition for STAR exists
-        expand("alignment/stats/{sample}.srt.bam.flagstat", sample=sample_list),
         expand("markdup/{sample}.mkdup.bam", sample=sample_list),
         expand("metrics/picard/{sample}.picard.rna.metrics.txt", sample=sample_list),
         expand("rsem/{sample}.genes.results", sample=sample_list) if config["run_rsem"] == "yes" else [],
@@ -40,24 +50,33 @@ rule all:
         "featurecounts/featurecounts.readcounts_rpkm.ann.tsv",
         "featurecounts/featurecounts.readcounts_fpkm.tsv",
         "featurecounts/featurecounts.readcounts_fpkm.ann.tsv",
+        expand("qc/{sample}", sample=sample_list)
     conda:
         "env_config/multiqc.yaml",
-    resources: cpus="10", maxtime="2:00:00", mem_mb=60000,
-
+    resources: cpus="10", maxtime="2:00:00", mem_mb="60gb",
     params:
         layout=config["layout"],
         multiqc=config["multiqc_path"],
         run_rsem=config["run_rsem"],
         aligner_name=config["aligner_name"],
-
     output:
-        "multiqc_report.html"
-
+        detailed_qc = "multiqc_report_detailed.html",
+        basic_qc = "multiqc_report_basic.html"
     shell: """
-        #multiqc fastqc alignment markdup metrics featurecounts
-        {params.multiqc} -p  alignment markdup metrics featurecounts
+        #-----multiqc fastqc alignment markdup metrics featurecounts
+        {params.multiqc} \
+            -c multiqc_config.yaml \
+            -p \
+            qc alignment markdup metrics featurecounts ribodetector \
+            -n {output.detailed_qc}
+        
+        {params.multiqc} \
+            -c multiqc_config.yaml \
+            -p \
+            qc/*/samtools qc/*/preseq alignment markdup metrics featurecounts ribodetector \
+            -n {output.basic_qc}
 
-        #remove dummy R2 file (created to meet input rule requirements for rule all:)
+        #-----remove dummy R2 file (created to meet input rule requirements for rule all:)
         # also remove dummy rpkm and fpkm files from featurecounts normalization
         if [ "{params.layout}" = "single" ]
           then
@@ -69,8 +88,7 @@ rule all:
             rm -f featurecounts/featurecounts.readcounts_rpkm.ann.tsv
         fi
 
-
-        #remove dummy alignment files (created to meet input rule requirements for rule all:)
+        #-----remove dummy alignment files (created to meet input rule requirements for rule all:)
         if [ "{params.aligner_name}" = "hisat" ]
           then
             rm -rf alignment/*.Aligned.toTranscriptome.out.bam
@@ -78,12 +96,14 @@ rule all:
 
 """
 
-
 rule trimming:
+    """
+    Read trimming
+    """
     output: 
-        "trimming/{sample}.R1.trim.fastq.gz",
-        "trimming/{sample}.R2.trim.fastq.gz" if config["layout"]=="paired" else [],
-        "trimming/{sample}.cutadapt.report"
+        trim_R1 = "trimming/{sample}.R1.trim.fastq.gz",
+        trim_R2 = "trimming/{sample}.R2.trim.fastq.gz" if config["layout"]=="paired" else [],
+        trim_log = "trimming/{sample}.cutadapt.report"
     params:
         sample = lambda wildcards:  wildcards.sample,
         cutadapt = config["cutadapt_path"],
@@ -93,41 +113,100 @@ rule trimming:
         nextseq_flag = config["cutadapt_nextseq_flag"]
     conda:
         "env_config/cutadapt.yaml",
-    resources: cpus="10", maxtime="2:00:00", mem_mb=60000,
-
+    resources: cpus="10", maxtime="2:00:00", mem_mb="60gb",
+    message: "Trimming {wildcards.sample} reads with cutadapt."
     shell: """
         if  [ "{params.layout}" == "paired" ] 
         then
             {params.cutadapt} \
-                -o trimming/{params.sample}.R1.trim.fastq.gz \
-                -p trimming/{params.sample}.R2.trim.fastq.gz \
+                -o {output.trim_R1} \
+                -p {output.trim_R2} \
                 {params.fastq_file_1} \
                 {params.fastq_file_2} \
                 -m 1 \
                 {params.nextseq_flag} \
                 -j {resources.cpus} \
                 --max-n 0.8 \
-                --trim-n > trimming/{params.sample}.cutadapt.report
+                --trim-n > {output.trim_log}
         else
             {params.cutadapt} \
-                -o trimming/{params.sample}.R1.trim.fastq.gz \
+                -o {output.trim_R1} \
                 {params.fastq_file_1} \
                 -m 1 \
                 {params.nextseq_flag} \
                 -j {resources.cpus} \
                 --max-n 0.8 \
-                --trim-n > trimming/{params.sample}.cutadapt.report
+                --trim-n > {output.trim_log}
         fi
 
     """
 
+rule ribodetector:
+    """
+    Ribosomal RNA filtering
+    """
+    input:
+        trimmed_read_1 = "trimming/{sample}.R1.trim.fastq.gz",
+        trimmed_read_2 = "trimming/{sample}.R2.trim.fastq.gz" if config["layout"]=="paired" else []
+    output:
+        filtered_read_1 = "ribodetector/{sample}/{sample}.nonrrna.1.fq.gz",
+        rrna_reads_1 = "ribodetector/{sample}/{sample}.rrna.1.fq.gz",
+        filtered_read_2 = "ribodetector/{sample}/{sample}.nonrrna.2.fq.gz" if config["layout"]=="paired" else [],
+        rrna_reads_2 = "ribodetector/{sample}/{sample}.rrna.2.fq.gz" if config["layout"]=="paired" else []
+    resources: cpus="10", maxtime="3:00:00", mem_mb="300gb"
+    message: "Removing rRNA sequences for {wildcards.sample} reads with ribodetector."
+    conda:
+    	"env_config/ribodetector.yaml"
+    params:
+        sample = lambda wildcards: wildcards.sample,
+        read_length = config["read_length"],
+        layout = config["layout"],
+        filtering_method = "norrna" if config["layout"]=="paired" else "none"
+    log:
+        stdout = "ribodetector/{sample}/stdout.log",
+        stderr = "ribodetector/{sample}/stderr.log"
+    shell: """
+        if [ "{params.layout}" = "paired" ]; then
+            ribodetector_cpu -t {resources.cpus} \
+                -l {params.read_length} \
+                -i {input.trimmed_read_1} {input.trimmed_read_2} \
+                -e norrna \
+                -o {output.filtered_read_1} {output.filtered_read_2} \
+                -r {output.rrna_reads_1} {output.rrna_reads_2} \
+                > {log.stdout} 2> {log.stderr}
+        else
+            ribodetector_cpu -t {resources.cpus} \
+                -l {params.read_length} \
+                -i {input.trimmed_read_1} \
+                -e norrna \
+                -o {output.filtered_read_1} \
+                -r {output.rrna_reads_1} \
+                > {log.stdout} 2> {log.stderr}
+        fi
+    """
 
-
-
-
-
+rule ribodetector_mqc_summary:
+    """
+    Generate ribodetector multiqc content
+    """
+    input:
+        expand("ribodetector/{sample}/stderr.log", sample=sample_list)
+    output:
+        "ribodetector/rrna_norrna_pct_mqc.tsv"
+    params:
+        script = "scripts/ribodetector_mqc.sh",
+        ribo_dir = "ribodetector"
+    resources: cpus="1", maxtime="10:00", mem_mb="2gb"
+    message: "Generating ribodetector report."
+    shell: """
+        bash scripts/ribo_stats.sh ribodetector
+    """
+  
 if config["aligner_name"]=="star":
   rule pre_alignment:
+    """
+    Generate aligner index and verify
+    """
       output: "alignment/index_status.txt",
       params: 
           layout = config["layout"],
@@ -137,9 +216,8 @@ if config["aligner_name"]=="star":
           samtools = config["samtools_path"],
       conda:
           "env_config/alignment.yaml",
-
-      resources: cpus="10", maxtime="8:00:00", mem_mb=120000,
-
+      resources: cpus="10", maxtime="8:00:00", mem_mb="120gb",
+      message: "Checking STAR alignment index"
       shell: """
         align_folder="sample_ref/STAR_index"
         if [ ! -d "{params.aligner_index}" ]
@@ -161,14 +239,15 @@ if config["aligner_name"]=="star":
       """
 
   rule alignment:
-      input: "trimming/{sample}.R1.trim.fastq.gz",
-             "trimming/{sample}.R2.trim.fastq.gz" if config["layout"] == "paired" else [],
+    """
+    STAR alignment
+    """
+      input: R1_FASTQ_INPUT, 
+             R2_FASTQ_INPUT if config["layout"] == "paired" else [],
              "alignment/index_status.txt",
-
       output: "alignment/{sample}.srt.bam",
               "alignment/{sample}.srt.bam.bai",
               "alignment/{sample}.Aligned.toTranscriptome.out.bam",
-
       params:
           layout = config["layout"],
           sample = lambda wildcards:  wildcards.sample,
@@ -176,12 +255,11 @@ if config["aligner_name"]=="star":
           aligner = config["aligner_path"],
           aligner_index = config["aligner_index"],
           samtools = config["samtools_path"],
-          readFilesIn = "trimming/{sample}.R1.trim.fastq.gz" + " trimming/{sample}.R2.trim.fastq.gz" if config["layout"] == "paired" else 'trimming/{sample}.R1.trim.fastq.gz'
+          readFilesIn = R1_FASTQ_INPUT + (f" {R2_FASTQ_INPUT}" if config["layout"] == "paired" else '')
       conda:
           "env_config/alignment.yaml",
-
-      resources: cpus="5", maxtime="8:00:00", mem_mb=100000,
-
+      resources: cpus="5", maxtime="8:00:00", mem_mb="100gb",
+      message: "aligning {wildcards.sample} reads with STAR."
       shell: """
         align_folder=`cat alignment/index_status.txt`
                 {params.aligner} \
@@ -205,23 +283,22 @@ if config["aligner_name"]=="star":
                     --readFilesCommand zcat \
                     --outFileNamePrefix alignment/{params.sample}.
         
-# rename output BAM
+        #----- rename output BAM
         mv alignment/{params.sample}.Aligned.sortedByCoord.out.bam alignment/{params.sample}.srt.bam
         
-        # index BAM
+        #----- index BAM
         {params.samtools} index -@ 4 alignment/{params.sample}.srt.bam
      """
 
-
-
 if config["aligner_name"]=="hisat":
   rule alignment:
+    """
+    Hisat2 alignment
+    """
       input: "trimming/{sample}.R1.trim.fastq.gz",
              "trimming/{sample}.R2.trim.fastq.gz" if config["layout"]=="paired" else [],
-
       output: "alignment/{sample}.srt.bam",
               "alignment/{sample}.srt.bam.bai",
-
       params:
           layout = config["layout"],
           sample = lambda wildcards:  wildcards.sample,
@@ -230,71 +307,80 @@ if config["aligner_name"]=="hisat":
           aligner_index = config["aligner_index"],
           samtools = config["samtools_path"],
           fastq_1_flag = '-1' if config['layout']=='paired' else '-U',
-          fastq_2 = '-2 trimming/{sample}.R2.trim.fastq.gz'  if config['layout']=='paired' else '',
-          
+          fastq_2 = '-2 trimming/{sample}.R2.trim.fastq.gz'  if config['layout']=='paired' else '',   
       conda:
           "env_config/alignment.yaml",
-
-      resources: cpus="4", maxtime="8:00:00", mem_mb=40000,
-
+      resources: cpus="4", maxtime="8:00:00", mem_mb="40gb",
+      message: "aligning {wildcards.sample} reads with hisat2."
       shell: """
           {params.hisat2} \
             -x {params.aligner_index} \
             --rg ID:{params.sample} \
             --rg SM:{params.sample} \
             --rg LB:{params.sample}  \
-            {params.fastq_1_flag} trimming/{params.sample}.R1.trim.fastq.gz \
+            {params.fastq_1_flag} \
+            trimming/{params.sample}.R1.trim.fastq.gz \
             {params.fastq_2}  \
             -p {resources.cpus}  \
             --summary-file alignment/{params.sample}.hisat.summary.txt | \
             {params.samtools} view -@ {resources.cpus} -b | \
             {params.samtools} sort -T /scratch/samtools_{params.sample} -@ {resources.cpus} -m 128M - 1> alignment/{params.sample}.srt.bam
 
-        # generate BAM index
+        #----- generate BAM index
         {params.samtools} index -@ {resources.cpus} alignment/{params.sample}.srt.bam
 
      """
 
-
-
-rule alignment_metrics:
-    input: "alignment/{sample}.srt.bam",
-
-    output: "alignment/stats/{sample}.srt.bam.flagstat",
-            "alignment/stats/{sample}.srt.bam.idxstats",
-
+rule rustqc:
+    """
+    RNA-seq QC with RustQC (containerized)
+    """
+    input:
+        bam = "markdup/{sample}.mkdup.bam"
+    output:
+        qc_dir = directory("qc/{sample}")
+    container: "docker://ghcr.io/seqeralabs/rustqc:latest"
     params:
-        samtools = config["samtools_path"],
-        sample = lambda wildcards:  wildcards.sample,
-    conda:
-        "env_config/samtools.yaml",
-
-    resources: cpus="2", maxtime="8:00:00", mem_mb=20000,
-
+        gtf = config["annotation_gtf"],
+        paired_flag = '-p' if config['layout']=='paired' else ''
+    threads: 4
+    resources: cpus="4", maxtime="8:00:00", mem_mb="40gb",
+    message: "Running comprehensive QC for {wildcards.sample} with RustQC."
+    log: "qc/{sample}/{sample}.log"
     shell: """
-            {params.samtools} flagstat alignment/{params.sample}.srt.bam > alignment/stats/{params.sample}.srt.bam.flagstat
-            {params.samtools} idxstats alignment/{params.sample}.srt.bam > alignment/stats/{params.sample}.srt.bam.idxstats
-           """
-
-rule picard_markdup:
-    input: "alignment/{sample}.srt.bam",
+    
+        rustqc rna \
+            {input.bam} \
+            --gtf {params.gtf} \
+            {params.paired_flag} \
+            -o {output.qc_dir} 2> {log} &&
+        
+        rm -r {output.qc_dir}/featurecounts
 
-    output: "markdup/{sample}.mkdup.bam",
+    """
 
+rule picard_markdup:
+    """
+    Deduplication
+    """
+    input: 
+        sorted_bam = "alignment/{sample}.srt.bam",
+    output: 
+        mkdups = "markdup/{sample}.mkdup.bam",
+        picard_log = "markdup/{sample}.mkdup.log.txt"
     params:
         sample = lambda wildcards:  wildcards.sample,
         picard = config['picard_path'],
     conda:
         "env_config/picard.yaml",
-
-    resources: cpus="2", maxtime="30:00", mem_mb=20000,
-
+    resources: cpus="2", maxtime="30:00", mem_mb="20gb",
+    message: "Deduplicating reads for {wildcards.sample} reads with Picard."
     shell: """
             {params.picard} -Xmx2G -Xms2G  \
                  MarkDuplicates \
-                I=alignment/{params.sample}.srt.bam \
-                O=markdup/{params.sample}.mkdup.bam \
-                M=markdup/{params.sample}.mkdup.log.txt \
+                I={input.sorted_bam} \
+                O={output.mkdups} \
+                M={output.picard_log} \
                 OPTICAL_DUPLICATE_PIXEL_DISTANCE=100 \
                 CREATE_INDEX=false  \
                 MAX_RECORDS_IN_RAM=4000000 \
@@ -303,10 +389,13 @@ rule picard_markdup:
 """
 
 rule picard_collectmetrics:
-    input: "markdup/{sample}.mkdup.bam",
-
-    output: "metrics/picard/{sample}.picard.rna.metrics.txt",
-
+    """
+    Picard metrics
+    """
+    input: 
+        mkdup_bam = "markdup/{sample}.mkdup.bam",
+    output: 
+        picard_metrics = "metrics/picard/{sample}.picard.rna.metrics.txt",
     params:
         sample = lambda wildcards:  wildcards.sample,
         picard = config['picard_path'],
@@ -315,24 +404,28 @@ rule picard_collectmetrics:
         strand = config['picard_strand'],
     conda:
         "env_config/picard.yaml",
-
-    resources: cpus="2", maxtime="8:00:00", mem_mb=20000,
-
+    resources: cpus="2", maxtime="8:00:00", mem_mb="20gb",
+    message: "Collecting RNASeq metrics for {wildcards.sample} reads with Picard."
     shell: """
         {params.picard} -Xmx2G -Xms2G \
-             CollectRnaSeqMetrics \
-            I=markdup/{params.sample}.mkdup.bam \
-            O=metrics/picard/{params.sample}.picard.rna.metrics.txt \
-            REF_FLAT={params.flatref} STRAND={params.strand} \
+            CollectRnaSeqMetrics \
+            I={input.mkdup_bam} \
+            O={output.picard_metrics} \
+            REF_FLAT={params.flatref} \
+            STRAND={params.strand} \
             RIBOSOMAL_INTERVALS={params.rrna_list} \
             MAX_RECORDS_IN_RAM=1000000
 """
 
 rule rsem:
-    input:  "alignment/{sample}.srt.bam",
-
-    output: "rsem/{sample}.genes.results",
-            "rsem/{sample}.isoforms.results"
+    """
+    Isoform counting
+    """
+    input:  
+        "alignment/{sample}.srt.bam",
+    output: 
+        "rsem/{sample}.genes.results",
+        "rsem/{sample}.isoforms.results"
     params:
         sample = lambda wildcards:  wildcards.sample,
         rsem_calc_exp_path = config['rsem_calc_exp_path'],
@@ -341,8 +434,8 @@ rule rsem:
         rsem_paired_flag = '--paired-end' if config["layout"]=='paired' else '',
     conda:
         "env_config/rsem.yaml",
-    resources: cpus="10", maxtime="8:00:00", mem_mb=60000,
-
+    resources: cpus="10", maxtime="8:00:00", mem_mb="60gb",
+    message: "Counting transcript isoforms for {wildcards.sample} reads with RSEM."
     shell: """   
         {params.rsem_calc_exp_path} \
           {params.rsem_paired_flag} \
@@ -356,16 +449,20 @@ rule rsem:
  """
 
 rule featurecounts:
-    input:  expand("alignment/{sample}.srt.bam", sample=sample_list),
-
-    output: "featurecounts/featurecounts.readcounts.tsv",
-            "featurecounts/featurecounts.readcounts.ann.tsv",
-            "featurecounts/featurecounts.readcounts_tpm.tsv",
-            "featurecounts/featurecounts.readcounts_tpm.ann.tsv",
-            "featurecounts/featurecounts.readcounts_rpkm.tsv",
-            "featurecounts/featurecounts.readcounts_rpkm.ann.tsv",
-            "featurecounts/featurecounts.readcounts_fpkm.tsv",
-            "featurecounts/featurecounts.readcounts_fpkm.ann.tsv",
+    """
+    Count reads
+    """
+    input:  
+        expand("alignment/{sample}.srt.bam", sample=sample_list),
+    output: 
+        "featurecounts/featurecounts.readcounts.tsv",
+        "featurecounts/featurecounts.readcounts.ann.tsv",
+        "featurecounts/featurecounts.readcounts_tpm.tsv",
+        "featurecounts/featurecounts.readcounts_tpm.ann.tsv",
+        "featurecounts/featurecounts.readcounts_rpkm.tsv",
+        "featurecounts/featurecounts.readcounts_rpkm.ann.tsv",
+        "featurecounts/featurecounts.readcounts_fpkm.tsv",
+        "featurecounts/featurecounts.readcounts_fpkm.ann.tsv",
     params:
         featurecounts = config['featurecounts_path'],
         layout = config["layout"],
@@ -376,12 +473,21 @@ rule featurecounts:
         fc_ann_script = config['featurecounts_annscript'],
     conda:
         "env_config/featurecounts.yaml",
-
-    resources: cpus="10", maxtime="8:00:00", mem_mb=100000,
-
+    resources: cpus="10", maxtime="8:00:00", mem_mb="100gb",
+    message: "Counting reads with FeatureCounts."
     shell: """
-        {params.featurecounts} -T 32 {params.pair_flag} -s {params.strand}  -a {params.gtf} -o featurecounts/featurecounts.readcounts.raw.tsv {input}
+        {params.featurecounts} \
+            -T 32 \
+            {params.pair_flag} \
+            -s {params.strand} \
+            -a {params.gtf} \
+            -o featurecounts/featurecounts.readcounts.raw.tsv \
+            {input}
+
+        #----- Clean
         sed s/"alignment\/"//g featurecounts/featurecounts.readcounts.raw.tsv| sed s/".srt.bam"//g| tail -n +2 > featurecounts/featurecounts.readcounts.tsv
+        
+        #----- Annotate
         python {params.fc_tpm_script} featurecounts/featurecounts.readcounts.tsv {params.layout}
         python {params.fc_ann_script} {params.gtf} featurecounts/featurecounts.readcounts.tsv > featurecounts/featurecounts.readcounts.ann.tsv
         python {params.fc_ann_script} {params.gtf} featurecounts/featurecounts.readcounts_tpm.tsv > featurecounts/featurecounts.readcounts_tpm.ann.tsv
@@ -398,8 +504,11 @@ rule featurecounts:
     """
 
 rule pca_plots:
-    input: "featurecounts/featurecounts.readcounts.tsv",
-
+    """
+    PCA
+    """
+    input: 
+        "featurecounts/featurecounts.readcounts.tsv",
     output:
         "plots/PCA_top_PC1_vs_PC2.png",
         "plots/PCA_top_PCA_variance_bar.png",
@@ -408,20 +517,22 @@ rule pca_plots:
     conda:
         "env_config/pcaplot.yaml",
     resources: cpus="1", maxtime="1:00:00", mem_mb=2000,
+    message: "Running PCA"
     shell: """
         python {params.pca_plot_script} \
         featurecounts/featurecounts.readcounts.tsv \
         plots
     """
 
-
-
 ####
 # Reference path checking
 # This rule is not run by the default Snakemake target.
 # To run these checks, run snakemake -s Snakefile check_refs
 ####
 rule check_refs:
+    """
+    Verify ref paths
+    """
     params:
         ref_gtf = config["annotation_gtf"],
         aligner_index = config["aligner_index"],
@@ -430,7 +541,8 @@ rule check_refs:
         picard_rrna_list = config["picard_rrna_list"],
         run_rsem = config["run_rsem"],
         rsem_ref = config["rsem_ref_path"],
-    resources: cpus="1", maxtime="1:00:00", mem_mb=2000,
+    resources: cpus="1", maxtime="1:00:00", mem_mb="2gb",
+    message: "Checking reference paths..."
     shell: """   
         
         echo "\nChecking for reference annotation GTF file..."
@@ -498,15 +610,15 @@ rule check_refs:
 
 """
 
-
-
-
 ####
 # Automatic Reference building
 # This rule is not run by the default Snakemake target.
 # To run these build commands, run snakemake -s Snakefile build_refs
 ####
 rule build_refs:
+    """
+    Building refs
+    """
     params:
         ref_fa = config["reference_fa"],
         ref_gtf = config["annotation_gtf"],        
@@ -517,7 +629,8 @@ rule build_refs:
         rsem_prepare_path = config["rsem_prep_ref_path"],
     conda:
           "env_config/build_refs.yaml",
-    resources: cpus="12", maxtime="8:00:00", mem_mb=48000,
+    resources: cpus="12", maxtime="8:00:00", mem_mb="48gb",
+    message: "Building references."
     shell: """
             REF_NAME=`basename {params.ref_fa} .fa`
             mkdir -p ref/pipeline_refs
@@ -576,3 +689,4 @@ fi
 
 
 """
+
diff --git a/env_config/pcaplot.yaml b/env_config/pcaplot.yaml
index 64264e1..b375718 100644
--- a/env_config/pcaplot.yaml
+++ b/env_config/pcaplot.yaml
@@ -9,5 +9,4 @@ dependencies:
   - seaborn
   - plotnine
   - pandas
-  - fastcluster
-
+  - fastcluster
\ No newline at end of file
diff --git a/env_config/ribodetector.yaml b/env_config/ribodetector.yaml
new file mode 100644
index 0000000..c0cdbe9
--- /dev/null
+++ b/env_config/ribodetector.yaml
@@ -0,0 +1,10 @@
+name: ribodetector
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - python=3.8
+  - pip
+  - pip:
+      - ribodetector==0.2.7
diff --git a/job.script.sh b/job.script.sh
index 5e91b3a..acbac46 100644
--- a/job.script.sh
+++ b/job.script.sh
@@ -1,25 +1,13 @@
 #!/bin/bash
 
-# Name of the job
-#SBATCH --job-name=RNAseq.preprocess
-
-# Number of compute nodes
+#SBATCH --job-name=Hixon_RNA
 #SBATCH --nodes=1
-
-# specify que to submit to (preempt1 only contains q10 DAC node)
-#SBATCH --partition=standard
-
-# specify account you are submitting from
-#SBATCH --account=nccc
-
-# Walltime (job duration)
+#SBATCH --partition=preempt1
+#SBATCH --account=dac
 #SBATCH --time=60:00:00
-
-# Email address
-#SBATCH --mail-user=XXXXXXXXX@dartmouth.edu
-
-# Email notifications (comma-separated options: BEGIN,END,FAIL)
+#SBATCH --mail-user=f007qps@dartmouth.edu
 #SBATCH --mail-type=FAIL
+#SBATCH --output=_Hixon_RNA_%j.log
 
 #----- Source conda environment
 source /optnfs/common/miniconda3/etc/profile.d/conda.sh
@@ -29,9 +17,10 @@ conda activate /dartfs/rc/nosnapshots/G/GMBSR_refs/envs/snakemake
 mkdir -p slurm_logs
 
 #----- Call Snakemake
-snakemake -s Snakefile  \
-	--conda-frontend conda \
+snakemake -s Snakefile \
 	--use-conda \
+	--use-singularity \
+	--singularity-args "--bind /dartfs,/optnfs" \
 	--conda-prefix /dartfs/rc/nosnapshots/G/GMBSR_refs/envs/DAC-RNAseq-pipeline \
 	--profile cluster_profile \
 	--rerun-incomplete \
diff --git a/multiqc_config.yaml b/multiqc_config.yaml
index d09094d..97fcb72 100644
--- a/multiqc_config.yaml
+++ b/multiqc_config.yaml
@@ -1,6 +1,23 @@
-report_header_info:
-  - Contact E-mail:  DataAnalyticsCore@groups.dartmouth.edu"
-  - Application Type: "X RNA-seq"
+# ============================================================================
+# CUSTOM BRANDING - Report Title, Subtitle, and Contact
+# ============================================================================
+
+# Main report title (appears at top of report)
+title: "GDSC-RNASeq Pipeline v1.4"
+
+# Subtitle text (appears below title)
+subtitle: "RNA-Seq Preprocessing"
+
+custom_logo: "img/cqb_logo.jpg"
+custom_logo_url: "https://sites.dartmouth.edu/cqb/files/2019/10/Logo-and-bringing-smaller-300.png"
+
+# Introductory text (appears at top of report, supports HTML/Markdown)
+intro_text: |
+  <p style="text-align: center; font-size: 1.1em;">
+    <em>Authors: Dartmouth Genomic Data Science Core</em><br>
+    For correspondence contact: <a href="mailto:gdsc@groups.dartmouth.edu">gdsc@groups.dartmouth.edu</a>
+  </p>
+
 
 extra_fn_clean_exts:
   - ".srt"
diff --git a/scripts/ribo_stats.sh b/scripts/ribo_stats.sh
new file mode 100644
index 0000000..fa80011
--- /dev/null
+++ b/scripts/ribo_stats.sh
@@ -0,0 +1,79 @@
+#!/usr/bin/env bash
+set -euo pipefail
+shopt -s nullglob
+
+if [[ $# -ne 1 ]]; then
+  echo "Usage: $0 <ribodetector_root_dir>" >&2
+  exit 1
+fi
+
+RESULTS_DIR="$1"
+
+if [[ ! -d "$RESULTS_DIR" ]]; then
+  echo "Error: '$RESULTS_DIR' is not a directory" >&2
+  exit 1
+fi
+
+out="ribodetector/rrna_norrna_pct_mqc.tsv"
+mkdir -p "$(dirname "$out")"
+
+# write MultiQC custom-content header
+cat > "$out" <<'EOF'
+# id: 'ribodetector'
+# section_name: 'Ribodetector rRNA Detection'
+# description: 'Percentage of reads classified as rRNA by ribodetector. High values indicate poor rRNA depletion during library prep.'
+# format: 'tsv'
+# plot_type: 'table'
+# pconfig:
+#     id: 'ribodetector_table'
+#     title: 'Ribodetector: rRNA contamination'
+# headers:
+#     total_reads:
+#         title: 'Total reads'
+#         description: 'Total input reads processed by ribodetector'
+#         format: '{:,.0f}'
+#         scale: 'Blues'
+#     rrna_reads:
+#         title: 'rRNA reads'
+#         description: 'Reads classified as rRNA (filtered out)'
+#         format: '{:,.0f}'
+#         scale: 'Reds'
+#     pct_rrna:
+#         title: '% rRNA'
+#         description: 'Percentage of input reads classified as rRNA'
+#         format: '{:,.2f}'
+#         suffix: '%'
+#         min: 0
+#         max: 100
+#         scale: 'RdYlGn-rev'
+EOF
+
+# append column header and data
+printf "sample\ttotal_reads\trrna_reads\tpct_rrna\n" >> "$out"
+
+for d in "$RESULTS_DIR"/*/; do
+  sample=$(basename "$d")
+  log="$d/stderr.log"
+
+  [[ -f "$log" ]] || { echo "skip: no stderr.log in $d" >&2; continue; }
+
+  total=$(awk '
+    { gsub(/\x1B\[[0-9;]*[A-Za-z]/,"") }
+    match($0,/([0-9]+)[[:space:]]+sequences loaded/,a) { print a[1]; exit }
+  ' "$log")
+
+  rrna=$(awk '
+    { gsub(/\x1B\[[0-9;]*[A-Za-z]/,"") }
+    match($0,/Detected[[:space:]]+([0-9]+)[[:space:]]+rRNA sequences/,a) { print a[1]; exit }
+  ' "$log")
+
+  [[ -n "${total:-}" && -n "${rrna:-}" ]] || { echo "skip: missing counts in $log" >&2; continue; }
+  [[ "$total" =~ ^[0-9]+$ && "$total" -gt 0 ]] || { echo "skip: bad total in $log" >&2; continue; }
+  [[ "$rrna"  =~ ^[0-9]+$ ]] || { echo "skip: bad rrna in $log" >&2; continue; }
+
+  pct=$(awk -v r="$rrna" -v t="$total" 'BEGIN{printf "%.4f", 100*r/t}')
+
+  printf "%s\t%s\t%s\t%s\n" "$sample" "$total" "$rrna" "$pct" >> "$out"
+done
+
+echo "Saved: $out"
\ No newline at end of file

From 695b049985a386a468d55cf9cac74b5c341fe562 Mon Sep 17 00:00:00 2001
From: mikemartinez99 <mike.j.martinez99@gmail.com>
Date: Fri, 17 Apr 2026 14:14:53 -0400
Subject: [PATCH 02/11] Ribodetector params in prebuilt configs

---
 prebuilt_configs/human_config_paired_hisat.yaml     | 3 +++
 prebuilt_configs/human_config_paired_star.yaml      | 3 +++
 prebuilt_configs/human_config_paired_star_rsem.yaml | 3 +++
 prebuilt_configs/human_config_single_hisat.yaml     | 3 +++
 prebuilt_configs/human_config_single_star.yaml      | 3 +++
 prebuilt_configs/mouse_config_paired_hisat.yaml     | 2 ++
 prebuilt_configs/mouse_config_paired_star.yaml      | 2 ++
 prebuilt_configs/mouse_config_paired_star_rsem.yaml | 2 ++
 prebuilt_configs/mouse_config_single_hisat.yaml     | 2 ++
 prebuilt_configs/mouse_config_single_star.yaml      | 2 ++
 10 files changed, 25 insertions(+)

diff --git a/prebuilt_configs/human_config_paired_hisat.yaml b/prebuilt_configs/human_config_paired_hisat.yaml
index ae659bf..d55dd0f 100644
--- a/prebuilt_configs/human_config_paired_hisat.yaml
+++ b/prebuilt_configs/human_config_paired_hisat.yaml
@@ -8,6 +8,9 @@ layout: "paired"
 #cutadapt_nextseq_flag: "--nextseq-trim=20"
 cutadapt_nextseq_flag: ""
 
+remove_rRNA: true
+read_length: 51
+
 
 #############################################
 # Software Paths for use with "--use-conda" #
diff --git a/prebuilt_configs/human_config_paired_star.yaml b/prebuilt_configs/human_config_paired_star.yaml
index 1ac92af..20908b1 100644
--- a/prebuilt_configs/human_config_paired_star.yaml
+++ b/prebuilt_configs/human_config_paired_star.yaml
@@ -8,6 +8,9 @@ layout: "paired"
 #cutadapt_nextseq_flag: "--nextseq-trim=20"
 cutadapt_nextseq_flag: ""
 
+remove_rRNA: true
+read_length: 51
+
 
 #############################################
 # Software Paths for use with "--use-conda" #
diff --git a/prebuilt_configs/human_config_paired_star_rsem.yaml b/prebuilt_configs/human_config_paired_star_rsem.yaml
index 8739c6c..d070638 100644
--- a/prebuilt_configs/human_config_paired_star_rsem.yaml
+++ b/prebuilt_configs/human_config_paired_star_rsem.yaml
@@ -8,6 +8,9 @@ layout: "paired"
 #cutadapt_nextseq_flag: "--nextseq-trim=20"
 cutadapt_nextseq_flag: ""
 
+remove_rRNA: true
+read_length: 51
+
 
 #############################################
 # Software Paths for use with "--use-conda" #
diff --git a/prebuilt_configs/human_config_single_hisat.yaml b/prebuilt_configs/human_config_single_hisat.yaml
index 7298aee..aa7c6b7 100644
--- a/prebuilt_configs/human_config_single_hisat.yaml
+++ b/prebuilt_configs/human_config_single_hisat.yaml
@@ -8,6 +8,9 @@ layout: "single"
 #cutadapt_nextseq_flag: "--nextseq-trim=20"
 cutadapt_nextseq_flag: ""
 
+remove_rRNA: true
+read_length: 51
+
 
 #############################################
 # Software Paths for use with "--use-conda" #
diff --git a/prebuilt_configs/human_config_single_star.yaml b/prebuilt_configs/human_config_single_star.yaml
index b169135..9f9f6c7 100644
--- a/prebuilt_configs/human_config_single_star.yaml
+++ b/prebuilt_configs/human_config_single_star.yaml
@@ -8,6 +8,9 @@ layout: "single"
 #cutadapt_nextseq_flag: "--nextseq-trim=20"
 cutadapt_nextseq_flag: ""
 
+remove_rRNA: true
+read_length: 51
+
 
 #############################################
 # Software Paths for use with "--use-conda" #
diff --git a/prebuilt_configs/mouse_config_paired_hisat.yaml b/prebuilt_configs/mouse_config_paired_hisat.yaml
index c60cd5d..ec3b9a6 100644
--- a/prebuilt_configs/mouse_config_paired_hisat.yaml
+++ b/prebuilt_configs/mouse_config_paired_hisat.yaml
@@ -8,6 +8,8 @@ layout: "paired"
 #cutadapt_nextseq_flag: "--nextseq-trim=20"
 cutadapt_nextseq_flag: ""
 
+remove_rRNA: true
+read_length: 51
 
 #############################################
 # Software Paths for use with "--use-conda" #
diff --git a/prebuilt_configs/mouse_config_paired_star.yaml b/prebuilt_configs/mouse_config_paired_star.yaml
index 9a2fe4b..772a54b 100644
--- a/prebuilt_configs/mouse_config_paired_star.yaml
+++ b/prebuilt_configs/mouse_config_paired_star.yaml
@@ -8,6 +8,8 @@ layout: "paired"
 #cutadapt_nextseq_flag: "--nextseq-trim=20"
 cutadapt_nextseq_flag: ""
 
+remove_rRNA: true
+read_length: 51
 
 #############################################
 # Software Paths for use with "--use-conda" #
diff --git a/prebuilt_configs/mouse_config_paired_star_rsem.yaml b/prebuilt_configs/mouse_config_paired_star_rsem.yaml
index ebd4202..6214610 100644
--- a/prebuilt_configs/mouse_config_paired_star_rsem.yaml
+++ b/prebuilt_configs/mouse_config_paired_star_rsem.yaml
@@ -8,6 +8,8 @@ layout: "paired"
 #cutadapt_nextseq_flag: "--nextseq-trim=20"
 cutadapt_nextseq_flag: ""
 
+remove_rRNA: true
+read_length: 51
 
 #############################################
 # Software Paths for use with "--use-conda" #
diff --git a/prebuilt_configs/mouse_config_single_hisat.yaml b/prebuilt_configs/mouse_config_single_hisat.yaml
index 4b74b7b..870bf3c 100644
--- a/prebuilt_configs/mouse_config_single_hisat.yaml
+++ b/prebuilt_configs/mouse_config_single_hisat.yaml
@@ -8,6 +8,8 @@ layout: "single"
 #cutadapt_nextseq_flag: "--nextseq-trim=20"
 cutadapt_nextseq_flag: ""
 
+remove_rRNA: true
+read_length: 51
 
 #############################################
 # Software Paths for use with "--use-conda" #
diff --git a/prebuilt_configs/mouse_config_single_star.yaml b/prebuilt_configs/mouse_config_single_star.yaml
index 017b430..0c62dac 100644
--- a/prebuilt_configs/mouse_config_single_star.yaml
+++ b/prebuilt_configs/mouse_config_single_star.yaml
@@ -8,6 +8,8 @@ layout: "single"
 #cutadapt_nextseq_flag: "--nextseq-trim=20"
 cutadapt_nextseq_flag: ""
 
+remove_rRNA: true
+read_length: 51
 
 #############################################
 # Software Paths for use with "--use-conda" #

From f44fddb95cbb1b7f95dd13f5b541f75b287b4af1 Mon Sep 17 00:00:00 2001
From: mikemartinez99 <mike.j.martinez99@gmail.com>
Date: Fri, 17 Apr 2026 18:13:49 -0400
Subject: [PATCH 03/11] update tests

---
 .github/workflows/test.yml | 60 ++++++++++++++++++++++++++------------
 1 file changed, 42 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index f6eeff9..ca994c3 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -6,9 +6,9 @@ name: Snakemake_tests
 on:
   # Triggers the workflow on push or pull request events but only for the branches listed
   push:
-    branches: [ master, dev-tim, dev-andrew-github-actions ]
+    branches: [ master, dev-tim, dev-andrew-github-actions, dev-ribodetector ]
   pull_request:
-    branches: [ master, dev-tim, dev-andrew-github-actions ]
+    branches: [ master, dev-tim, dev-andrew-github-actions, dev-ribodetector ]
 
   # Including this block the allow the workflow to be run manually from the GitHub Actions tab
   workflow_dispatch:
@@ -29,12 +29,16 @@ jobs:
     # Steps to be run
     steps:
       - uses: actions/checkout@v2
+      - name: Install Apptainer
+        uses: eWaterCycle/setup-apptainer@v2
+        with:
+          apptainer-version: 1.3.6
       - name: Create_conda_env
         uses: snakemake/snakemake-github-action@v1
         with:
           directory: '.'
           snakefile: 'Snakefile'
-          args: '--cores 4 --use-conda --conda-create-envs-only'
+          args: '--cores 4 --use-conda --use-singularity --conda-create-envs-only'
       - name: tar conda envs
         run: |
           tar -cf ./conda_envs_archive.tar ./.snakemake
@@ -88,12 +92,16 @@ jobs:
 
     steps:
       - uses: actions/checkout@v2
+      - name: Install Apptainer
+        uses: eWaterCycle/setup-apptainer@v2
+        with:
+          apptainer-version: 1.3.6
       - name: Hisat_single_config_build
         uses: snakemake/snakemake-github-action@v1
         with:
           directory: '.'
           snakefile: 'Snakefile'
-          args: 'build_refs --cores 4 --use-conda --configfile tests/test_config_single_hisat.yaml'
+          args: 'build_refs --cores 4 --use-conda --use-singularity --configfile tests/test_config_single_hisat.yaml'
       - name: Hisat_single_config_configure
         run: cat ref/pipeline_refs/hg38_chr567_100k.entries.yaml >> tests/test_config_single_hisat.yaml
       - name: Hisat_single_config_check
@@ -101,13 +109,13 @@ jobs:
         with:
           directory: '.'
           snakefile: 'Snakefile'
-          args: 'check_refs --cores 4 --use-conda --configfile tests/test_config_single_hisat.yaml'
+          args: 'check_refs --cores 4 --use-conda --use-singularity --configfile tests/test_config_single_hisat.yaml'
       - name: Hisat_single_config_run
         uses: snakemake/snakemake-github-action@v1
         with:
           directory: '.'
           snakefile: 'Snakefile'
-          args: '--cores 4 --use-conda --configfile tests/test_config_single_hisat.yaml'
+          args: '--cores 4 --use-conda --use-singularity --configfile tests/test_config_single_hisat.yaml'
 
   # paired_hisat
   #
@@ -118,12 +126,16 @@ jobs:
 
     steps:
       - uses: actions/checkout@v2
+      - name: Install Apptainer
+        uses: eWaterCycle/setup-apptainer@v2
+        with:
+          apptainer-version: 1.3.6
       - name: Hisat_paired_config_build
         uses: snakemake/snakemake-github-action@v1
         with:
           directory: '.'
           snakefile: 'Snakefile'
-          args: 'build_refs --cores 4 --use-conda --configfile tests/test_config_paired_hisat.yaml'
+          args: 'build_refs --cores 4 --use-conda --use-singularity --configfile tests/test_config_paired_hisat.yaml'
       - name: Hisat_paired_config_configure
         run: cat ref/pipeline_refs/hg38_chr567_100k.entries.yaml >> tests/test_config_paired_hisat.yaml
       - name: Hisat_paired_config_check
@@ -131,13 +143,13 @@ jobs:
         with:
           directory: '.'
           snakefile: 'Snakefile'
-          args: 'check_refs --cores 4 --use-conda --configfile tests/test_config_paired_hisat.yaml'
+          args: 'check_refs --cores 4 --use-conda --use-singularity --configfile tests/test_config_paired_hisat.yaml'
       - name: Hisat_paired_config_run
         uses: snakemake/snakemake-github-action@v1
         with:
           directory: '.'
           snakefile: 'Snakefile'
-          args: '--cores 4 --use-conda --configfile tests/test_config_paired_hisat.yaml'
+          args: '--cores 4 --use-conda --use-singularity --configfile tests/test_config_paired_hisat.yaml'
 
   # single_star
   #
@@ -147,12 +159,16 @@ jobs:
 
     steps:
       - uses: actions/checkout@v2
+      - name: Install Apptainer
+        uses: eWaterCycle/setup-apptainer@v2
+        with:
+          apptainer-version: 1.3.6
       - name: STAR_single_config_build
         uses: snakemake/snakemake-github-action@v1
         with:
           directory: '.'
           snakefile: 'Snakefile'
-          args: 'build_refs --cores 4 --use-conda --configfile tests/test_config_single_star.yaml'
+          args: 'build_refs --cores 4 --use-conda --use-singularity --configfile tests/test_config_single_star.yaml'
       - name: STAR_single_config_configure
         run: cat ref/pipeline_refs/hg38_chr567_100k.entries.yaml >> tests/test_config_single_star.yaml
       - name: STAR_single_config_check
@@ -160,13 +176,13 @@ jobs:
         with:
           directory: '.'
           snakefile: 'Snakefile'
-          args: 'check_refs --cores 4 --use-conda --configfile tests/test_config_single_star.yaml'
+          args: 'check_refs --cores 4 --use-conda --use-singularity --configfile tests/test_config_single_star.yaml'
       - name: STAR_single_config_run
         uses: snakemake/snakemake-github-action@v1
         with:
           directory: '.'
           snakefile: 'Snakefile'
-          args: '--cores 4 --use-conda --configfile tests/test_config_single_star.yaml'
+          args: '--cores 4 --use-conda --use-singularity --configfile tests/test_config_single_star.yaml'
 
   # paired_star
   #
@@ -177,12 +193,16 @@ jobs:
 
     steps:
       - uses: actions/checkout@v2
+      - name: Install Apptainer
+        uses: eWaterCycle/setup-apptainer@v2
+        with:
+          apptainer-version: 1.3.6
       - name: STAR_paired_config_build
         uses: snakemake/snakemake-github-action@v1
         with:
           directory: '.'
           snakefile: 'Snakefile'
-          args: 'build_refs --cores 4 --use-conda --configfile tests/test_config_paired_star.yaml'
+          args: 'build_refs --cores 4 --use-conda --use-singularity --configfile tests/test_config_paired_star.yaml'
       - name: STAR_paired_config_configure
         run: cat ref/pipeline_refs/hg38_chr567_100k.entries.yaml >> tests/test_config_paired_star.yaml
       - name: STAR_paired_config_check
@@ -190,13 +210,13 @@ jobs:
         with:
           directory: '.'
           snakefile: 'Snakefile'
-          args: 'check_refs --cores 4 --use-conda --configfile tests/test_config_paired_star.yaml'
+          args: 'check_refs --cores 4 --use-conda --use-singularity --configfile tests/test_config_paired_star.yaml'
       - name: STAR_paired_config_run
         uses: snakemake/snakemake-github-action@v1
         with:
           directory: '.'
           snakefile: 'Snakefile'
-          args: '--cores 4 --use-conda --configfile tests/test_config_paired_star.yaml'
+          args: '--cores 4 --use-conda --use-singularity --configfile tests/test_config_paired_star.yaml'
 
   # paired_star with rsem
   #
@@ -207,12 +227,16 @@ jobs:
 
     steps:
       - uses: actions/checkout@v2
+      - name: Install Apptainer
+        uses: eWaterCycle/setup-apptainer@v2
+        with:
+          apptainer-version: 1.3.6
       - name: STAR_RSEM_paired_config_build
         uses: snakemake/snakemake-github-action@v1
         with:
           directory: '.'
           snakefile: 'Snakefile'
-          args: 'build_refs --cores 4 --use-conda --configfile tests/test_config_paired_star_rsem.yaml'
+          args: 'build_refs --cores 4 --use-conda --use-singularity --configfile tests/test_config_paired_star_rsem.yaml'
       - name: STAR_RSEM_paired_config_configure
         run: cat ref/pipeline_refs/hg38_chr567_100k.entries.yaml >> tests/test_config_paired_star_rsem.yaml
       - name: STAR_RSEM_paired_config_check
@@ -220,13 +244,13 @@ jobs:
         with:
           directory: '.'
           snakefile: 'Snakefile'
-          args: 'check_refs --cores 4 --use-conda --configfile tests/test_config_paired_star_rsem.yaml'
+          args: 'check_refs --cores 4 --use-conda --use-singularity --configfile tests/test_config_paired_star_rsem.yaml'
       - name: STAR_RSEM_paired_config_run
         uses: snakemake/snakemake-github-action@v1
         with:
           directory: '.'
           snakefile: 'Snakefile'
-          args: '--cores 4 --use-conda --configfile tests/test_config_paired_star_rsem.yaml'
+          args: '--cores 4 --use-conda --use-singularity --configfile tests/test_config_paired_star_rsem.yaml'
 
   #debug_environment_info
   #

From 8e8889fefe6539555fbf36512ab4ed5cbf5cbdab Mon Sep 17 00:00:00 2001
From: mikemartinez99 <mike.j.martinez99@gmail.com>
Date: Fri, 17 Apr 2026 18:25:15 -0400
Subject: [PATCH 04/11] convert all mem_mb to integers

---
 Snakefile | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/Snakefile b/Snakefile
index 8f7c9dc..9abd951 100644
--- a/Snakefile
+++ b/Snakefile
@@ -6,7 +6,6 @@ import pandas as pd
 
 #----- set config file
 configfile: "config.yaml"
-print(config)
 
 #----- read in sample data
 samples_df = pd.read_csv(config["sample_csv"]).set_index("sample_id", drop=False)
@@ -53,7 +52,7 @@ rule all:
         expand("qc/{sample}", sample=sample_list)
     conda:
         "env_config/multiqc.yaml",
-    resources: cpus="10", maxtime="2:00:00", mem_mb="60gb",
+    resources: cpus="10", maxtime="2:00:00", mem_mb=60000,
     params:
         layout=config["layout"],
         multiqc=config["multiqc_path"],
@@ -113,7 +112,7 @@ rule trimming:
         nextseq_flag = config["cutadapt_nextseq_flag"]
     conda:
         "env_config/cutadapt.yaml",
-    resources: cpus="10", maxtime="2:00:00", mem_mb="60gb",
+    resources: cpus="10", maxtime="2:00:00", mem_mb=60000,
     message: "Trimming {wildcards.sample} reads with cutadapt."
     shell: """
         if  [ "{params.layout}" == "paired" ] 
@@ -153,7 +152,7 @@ rule ribodetector:
         rrna_reads_1 = "ribodetector/{sample}/{sample}.rrna.1.fq.gz",
         filtered_read_2 = "ribodetector/{sample}/{sample}.nonrrna.2.fq.gz" if config["layout"]=="paired" else [],
         rrna_reads_2 = "ribodetector/{sample}/{sample}.rrna.2.fq.gz" if config["layout"]=="paired" else []
-    resources: cpus="10", maxtime="3:00:00", mem_mb="300gb"
+    resources: cpus="10", maxtime="3:00:00", mem_mb=300000
     message: "Removing rRNA sequences for {wildcards.sample} reads with ribodetector."
     conda:
     	"env_config/ribodetector.yaml"
@@ -196,7 +195,7 @@ rule ribodetector_mqc_summary:
     params:
         script = "scripts/ribodetector_mqc.sh",
         ribo_dir = "ribodetector"
-    resources: cpus="1", maxtime="10:00", mem_mb="2gb"
+    resources: cpus="1", maxtime="10:00", mem_mb=2000
     message: "Generating ribodetector report."
     shell: """
         bash scripts/ribo_stats.sh ribodetector
@@ -216,7 +215,7 @@ if config["aligner_name"]=="star":
           samtools = config["samtools_path"],
       conda:
           "env_config/alignment.yaml",
-      resources: cpus="10", maxtime="8:00:00", mem_mb="120gb",
+      resources: cpus="10", maxtime="8:00:00", mem_mb=120000,
       message: "Checking STAR alignment index"
       shell: """
         align_folder="sample_ref/STAR_index"
@@ -258,7 +257,7 @@ if config["aligner_name"]=="star":
           readFilesIn = R1_FASTQ_INPUT + (f" {R2_FASTQ_INPUT}" if config["layout"] == "paired" else '')
       conda:
           "env_config/alignment.yaml",
-      resources: cpus="5", maxtime="8:00:00", mem_mb="100gb",
+      resources: cpus="5", maxtime="8:00:00", mem_mb=100000,
       message: "aligning {wildcards.sample} reads with STAR."
       shell: """
         align_folder=`cat alignment/index_status.txt`
@@ -310,7 +309,7 @@ if config["aligner_name"]=="hisat":
           fastq_2 = '-2 trimming/{sample}.R2.trim.fastq.gz'  if config['layout']=='paired' else '',   
       conda:
           "env_config/alignment.yaml",
-      resources: cpus="4", maxtime="8:00:00", mem_mb="40gb",
+      resources: cpus="4", maxtime="8:00:00", mem_mb=40000,
       message: "aligning {wildcards.sample} reads with hisat2."
       shell: """
           {params.hisat2} \
@@ -344,7 +343,7 @@ rule rustqc:
         gtf = config["annotation_gtf"],
         paired_flag = '-p' if config['layout']=='paired' else ''
     threads: 4
-    resources: cpus="4", maxtime="8:00:00", mem_mb="40gb",
+    resources: cpus="4", maxtime="8:00:00", mem_mb=40000,
     message: "Running comprehensive QC for {wildcards.sample} with RustQC."
     log: "qc/{sample}/{sample}.log"
     shell: """
@@ -373,7 +372,7 @@ rule picard_markdup:
         picard = config['picard_path'],
     conda:
         "env_config/picard.yaml",
-    resources: cpus="2", maxtime="30:00", mem_mb="20gb",
+    resources: cpus="2", maxtime="30:00", mem_mb=20000,
     message: "Deduplicating reads for {wildcards.sample} reads with Picard."
     shell: """
             {params.picard} -Xmx2G -Xms2G  \
@@ -404,7 +403,7 @@ rule picard_collectmetrics:
         strand = config['picard_strand'],
     conda:
         "env_config/picard.yaml",
-    resources: cpus="2", maxtime="8:00:00", mem_mb="20gb",
+    resources: cpus="2", maxtime="8:00:00", mem_mb=20000,
     message: "Collecting RNASeq metrics for {wildcards.sample} reads with Picard."
     shell: """
         {params.picard} -Xmx2G -Xms2G \
@@ -434,7 +433,7 @@ rule rsem:
         rsem_paired_flag = '--paired-end' if config["layout"]=='paired' else '',
     conda:
         "env_config/rsem.yaml",
-    resources: cpus="10", maxtime="8:00:00", mem_mb="60gb",
+    resources: cpus="10", maxtime="8:00:00", mem_mb=60000,
     message: "Counting transcript isoforms for {wildcards.sample} reads with RSEM."
     shell: """   
         {params.rsem_calc_exp_path} \
@@ -473,7 +472,7 @@ rule featurecounts:
         fc_ann_script = config['featurecounts_annscript'],
     conda:
         "env_config/featurecounts.yaml",
-    resources: cpus="10", maxtime="8:00:00", mem_mb="100gb",
+    resources: cpus="10", maxtime="8:00:00", mem_mb=100000,
     message: "Counting reads with FeatureCounts."
     shell: """
         {params.featurecounts} \
@@ -485,7 +484,7 @@ rule featurecounts:
             {input}
 
         #----- Clean
-        sed s/"alignment\/"//g featurecounts/featurecounts.readcounts.raw.tsv| sed s/".srt.bam"//g| tail -n +2 > featurecounts/featurecounts.readcounts.tsv
+        sed 's|alignment/||g' featurecounts/featurecounts.readcounts.raw.tsv| sed 's|.srt.bam||g'| tail -n +2 > featurecounts/featurecounts.readcounts.tsv
         
         #----- Annotate
         python {params.fc_tpm_script} featurecounts/featurecounts.readcounts.tsv {params.layout}
@@ -541,7 +540,7 @@ rule check_refs:
         picard_rrna_list = config["picard_rrna_list"],
         run_rsem = config["run_rsem"],
         rsem_ref = config["rsem_ref_path"],
-    resources: cpus="1", maxtime="1:00:00", mem_mb="2gb",
+    resources: cpus="1", maxtime="1:00:00", mem_mb=2000,
     message: "Checking reference paths..."
     shell: """   
         
@@ -629,7 +628,7 @@ rule build_refs:
         rsem_prepare_path = config["rsem_prep_ref_path"],
     conda:
           "env_config/build_refs.yaml",
-    resources: cpus="12", maxtime="8:00:00", mem_mb="48gb",
+    resources: cpus="12", maxtime="8:00:00", mem_mb=48000,
     message: "Building references."
     shell: """
             REF_NAME=`basename {params.ref_fa} .fa`

From 3ae090dd906447acb19a512c7164d4a4a132cb0e Mon Sep 17 00:00:00 2001
From: mikemartinez99 <mike.j.martinez99@gmail.com>
Date: Fri, 17 Apr 2026 18:33:04 -0400
Subject: [PATCH 05/11] Fix read_length inclusion in main config

---
 config.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/config.yaml b/config.yaml
index 847842f..f5b1790 100644
--- a/config.yaml
+++ b/config.yaml
@@ -16,6 +16,10 @@ aligner_name: "hisat" #'hisat' or 'star'
 # rsem choice
 run_rsem: "no"
 
+# Run rrna filtering?
+remove_rRNA: true
+read_length: 51
+
 #############################################
 #Strandedness settings - these should match #
 #############################################

From 36d47e770626817d8283643b467ca5cf58213e86 Mon Sep 17 00:00:00 2001
From: mikemartinez99 <mike.j.martinez99@gmail.com>
Date: Fri, 17 Apr 2026 18:46:56 -0400
Subject: [PATCH 06/11] fix gawk error in ribo_stats to work with github action

---
 scripts/ribo_stats.sh | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/scripts/ribo_stats.sh b/scripts/ribo_stats.sh
index fa80011..eb84240 100644
--- a/scripts/ribo_stats.sh
+++ b/scripts/ribo_stats.sh
@@ -57,15 +57,11 @@ for d in "$RESULTS_DIR"/*/; do
 
   [[ -f "$log" ]] || { echo "skip: no stderr.log in $d" >&2; continue; }
 
-  total=$(awk '
-    { gsub(/\x1B\[[0-9;]*[A-Za-z]/,"") }
-    match($0,/([0-9]+)[[:space:]]+sequences loaded/,a) { print a[1]; exit }
-  ' "$log")
+  total=$(sed 's/\x1B\[[0-9;]*[A-Za-z]//g' "$log" \
+    | grep -oP '[0-9]+(?=\s+sequences loaded)' | head -1)
 
-  rrna=$(awk '
-    { gsub(/\x1B\[[0-9;]*[A-Za-z]/,"") }
-    match($0,/Detected[[:space:]]+([0-9]+)[[:space:]]+rRNA sequences/,a) { print a[1]; exit }
-  ' "$log")
+  rrna=$(sed 's/\x1B\[[0-9;]*[A-Za-z]//g' "$log" \
+    | grep -oP '(?<=Detected\s)[0-9]+(?=\s+rRNA sequences)' | head -1)
 
   [[ -n "${total:-}" && -n "${rrna:-}" ]] || { echo "skip: missing counts in $log" >&2; continue; }
   [[ "$total" =~ ^[0-9]+$ && "$total" -gt 0 ]] || { echo "skip: bad total in $log" >&2; continue; }

From 391c7d83253484a77d837360f7a0bee6f626a286 Mon Sep 17 00:00:00 2001
From: mikemartinez99 <mike.j.martinez99@gmail.com>
Date: Tue, 21 Apr 2026 11:39:25 -0400
Subject: [PATCH 07/11] add gitignore

---
 .gitignore | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e69de29

From 73344fbbb4c898ec5a16a085a99cf810d0395745 Mon Sep 17 00:00:00 2001
From: mikemartinez99 <mike.j.martinez99@gmail.com>
Date: Tue, 21 Apr 2026 11:43:43 -0400
Subject: [PATCH 08/11] Make RustQC optional

---
 .gitignore                                    | 19 ++++
 Snakefile                                     | 99 +++++++++++--------
 config.yaml                                   | 24 +++--
 job.script.sh                                 |  6 +-
 .../human_config_paired_hisat.yaml            | 42 +++++---
 .../human_config_paired_star.yaml             | 46 ++++++---
 .../human_config_paired_star_rsem.yaml        | 46 ++++++---
 .../human_config_single_hisat.yaml            | 46 ++++++---
 .../human_config_single_star.yaml             | 46 ++++++---
 .../mouse_config_paired_hisat.yaml            | 47 ++++++---
 .../mouse_config_paired_star.yaml             | 47 ++++++---
 .../mouse_config_paired_star_rsem.yaml        | 47 ++++++---
 .../mouse_config_single_hisat.yaml            | 47 ++++++---
 .../mouse_config_single_star.yaml             | 47 ++++++---
 tests/test_config_paired_hisat.yaml           |  1 +
 tests/test_config_paired_star.yaml            |  1 +
 tests/test_config_paired_star_rsem.yaml       |  1 +
 tests/test_config_single_hisat.yaml           |  1 +
 tests/test_config_single_star.yaml            |  1 +
 19 files changed, 444 insertions(+), 170 deletions(-)

diff --git a/.gitignore b/.gitignore
index e69de29..4232ef6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,19 @@
+.cache/*
+.claude/*
+.fontconfig/*
+.snakemake/*
+.Rapp.history
+alignment/*
+featurecounts/*
+logs/*
+markdup/*
+metrics/*
+multiqc_report_data/*
+multiqc_report_plots/*
+plots/*
+qc/*
+ribodetector/*
+trimming/*
+slurm_logs/*
+*.log
+multiqc_report.html
diff --git a/Snakefile b/Snakefile
index 9abd951..aab731e 100644
--- a/Snakefile
+++ b/Snakefile
@@ -13,6 +13,8 @@ sample_list = list(samples_df['sample_id'])
 
 #----- Set additional rules based on params
 REMOVE_rRNA = config.get("remove_rRNA", False)
+RUN_RUSTQC = config.get("run_rustqc", False)
+
 if REMOVE_rRNA:
     R1_FASTQ_INPUT = "ribodetector/{sample}/{sample}.nonrrna.1.fq.gz" 
     R2_FASTQ_INPUT = "ribodetector/{sample}/{sample}.nonrrna.2.fq.gz" if config["layout"]=="paired" else None
@@ -49,7 +51,7 @@ rule all:
         "featurecounts/featurecounts.readcounts_rpkm.ann.tsv",
         "featurecounts/featurecounts.readcounts_fpkm.tsv",
         "featurecounts/featurecounts.readcounts_fpkm.ann.tsv",
-        expand("qc/{sample}", sample=sample_list)
+        expand("qc/{sample}", sample=sample_list) if RUN_RUSTQC else [expand("alignment/stats/{sample}.srt.bam.flagstat", sample=sample_list)]
     conda:
         "env_config/multiqc.yaml",
     resources: cpus="10", maxtime="2:00:00", mem_mb=60000,
@@ -58,23 +60,21 @@ rule all:
         multiqc=config["multiqc_path"],
         run_rsem=config["run_rsem"],
         aligner_name=config["aligner_name"],
+        multiqc_dirs=(
+            ("qc " if RUN_RUSTQC else "") +
+            "alignment markdup metrics featurecounts" +
+            (" ribodetector" if REMOVE_rRNA else "")
+        )
     output:
-        detailed_qc = "multiqc_report_detailed.html",
-        basic_qc = "multiqc_report_basic.html"
+        report = "multiqc_report.html",
     shell: """
         #-----multiqc fastqc alignment markdup metrics featurecounts
         {params.multiqc} \
             -c multiqc_config.yaml \
             -p \
-            qc alignment markdup metrics featurecounts ribodetector \
-            -n {output.detailed_qc}
+            {params.multiqc_dirs} \
+            -n {output.report}
         
-        {params.multiqc} \
-            -c multiqc_config.yaml \
-            -p \
-            qc/*/samtools qc/*/preseq alignment markdup metrics featurecounts ribodetector \
-            -n {output.basic_qc}
-
         #-----remove dummy R2 file (created to meet input rule requirements for rule all:)
         # also remove dummy rpkm and fpkm files from featurecounts normalization
         if [ "{params.layout}" = "single" ]
@@ -292,7 +292,7 @@ if config["aligner_name"]=="star":
 if config["aligner_name"]=="hisat":
   rule alignment:
     """
-    Hisat2 alignment
+    HISAT2 Alignment.
     """
       input: "trimming/{sample}.R1.trim.fastq.gz",
              "trimming/{sample}.R2.trim.fastq.gz" if config["layout"]=="paired" else [],
@@ -306,56 +306,47 @@ if config["aligner_name"]=="hisat":
           aligner_index = config["aligner_index"],
           samtools = config["samtools_path"],
           fastq_1_flag = '-1' if config['layout']=='paired' else '-U',
-          fastq_2 = '-2 trimming/{sample}.R2.trim.fastq.gz'  if config['layout']=='paired' else '',   
+          fastq_2 = '-2 trimming/{sample}.R2.trim.fastq.gz'  if config['layout']=='paired' else '',
       conda:
           "env_config/alignment.yaml",
       resources: cpus="4", maxtime="8:00:00", mem_mb=40000,
-      message: "aligning {wildcards.sample} reads with hisat2."
+      message: "aligning {wildcards.sample} reads with Hisat2."
       shell: """
           {params.hisat2} \
             -x {params.aligner_index} \
             --rg ID:{params.sample} \
             --rg SM:{params.sample} \
-            --rg LB:{params.sample}  \
-            {params.fastq_1_flag} \
-            trimming/{params.sample}.R1.trim.fastq.gz \
+            --rg LB:{params.sample} \
+            {params.fastq_1_flag} trimming/{params.sample}.R1.trim.fastq.gz \
             {params.fastq_2}  \
             -p {resources.cpus}  \
             --summary-file alignment/{params.sample}.hisat.summary.txt | \
             {params.samtools} view -@ {resources.cpus} -b | \
             {params.samtools} sort -T /scratch/samtools_{params.sample} -@ {resources.cpus} -m 128M - 1> alignment/{params.sample}.srt.bam
 
-        #----- generate BAM index
+        # generate BAM index
         {params.samtools} index -@ {resources.cpus} alignment/{params.sample}.srt.bam
 
      """
 
-rule rustqc:
+
+rule alignment_metrics:
     """
-    RNA-seq QC with RustQC (containerized)
+    Samtools metrics (when rustQC is false)
     """
-    input:
-        bam = "markdup/{sample}.mkdup.bam"
-    output:
-        qc_dir = directory("qc/{sample}")
-    container: "docker://ghcr.io/seqeralabs/rustqc:latest"
+    input: "alignment/{sample}.srt.bam",
+    output: "alignment/stats/{sample}.srt.bam.flagstat",
+            "alignment/stats/{sample}.srt.bam.idxstats",
     params:
-        gtf = config["annotation_gtf"],
-        paired_flag = '-p' if config['layout']=='paired' else ''
-    threads: 4
-    resources: cpus="4", maxtime="8:00:00", mem_mb=40000,
-    message: "Running comprehensive QC for {wildcards.sample} with RustQC."
-    log: "qc/{sample}/{sample}.log"
+        samtools = config["samtools_path"],
+        sample = lambda wildcards:  wildcards.sample,
+    conda:
+        "env_config/samtools.yaml",
+    resources: cpus="2", maxtime="8:00:00", mem_mb=20000,
+    message: "Running flagstats and idxstats QC for {wildcards.sample} with Samtools."
     shell: """
-    
-        rustqc rna \
-            {input.bam} \
-            --gtf {params.gtf} \
-            {params.paired_flag} \
-            -o {output.qc_dir} 2> {log} &&
-        
-        rm -r {output.qc_dir}/featurecounts
-
+        {params.samtools} flagstat alignment/{params.sample}.srt.bam > alignment/stats/{params.sample}.srt.bam.flagstat
+        {params.samtools} idxstats alignment/{params.sample}.srt.bam > alignment/stats/{params.sample}.srt.bam.idxstats
     """
 
 rule picard_markdup:
@@ -416,6 +407,34 @@ rule picard_collectmetrics:
             MAX_RECORDS_IN_RAM=1000000
 """
 
+rule rustqc:
+    """
+    RNA-seq QC with RustQC (containerized)
+    """
+    input:
+        bam = "markdup/{sample}.mkdup.bam"
+    output:
+        qc_dir = directory("qc/{sample}")
+    container: "docker://ghcr.io/seqeralabs/rustqc:v0.2.1"
+    params:
+        gtf = config["annotation_gtf"],
+        paired_flag = '-p' if config['layout']=='paired' else ''
+    threads: 4
+    resources: cpus="4", maxtime="8:00:00", mem_mb=40000
+    message: "Running comprehensive QC for {wildcards.sample} with RustQC."
+    log: "logs/rustqc/{sample}.log"
+    shell: """
+
+        rustqc rna \
+            {input.bam} \
+            --gtf {params.gtf} \
+            {params.paired_flag} \
+            --skip-dup-check \
+            -o {output.qc_dir} 2> {log}
+
+
+    """
+
 rule rsem:
     """
     Isoform counting
diff --git a/config.yaml b/config.yaml
index f5b1790..7ae4e62 100644
--- a/config.yaml
+++ b/config.yaml
@@ -7,8 +7,12 @@
 # this file must have the header sample_id<tab>fastq_1<tab>fastq_2.  (The second tab and fastq_2 are optional.)
 sample_csv: "sample_fastq_list_paired.csv"
 
+#############################################
+# Toggleable parameters                     #
+#############################################
+
 # library layout
-layout: "paired"
+layout: "paired" # 'single or paired'
 
 # aligner choice
 aligner_name: "hisat" #'hisat' or 'star'
@@ -16,10 +20,13 @@ aligner_name: "hisat" #'hisat' or 'star'
 # rsem choice
 run_rsem: "no"
 
-# Run rrna filtering?
+# Run Ribodetector for rRNA filtering
 remove_rRNA: true
 read_length: 51
 
+# Run RustQC (14 tool comprehensive QC in a single pass)
+run_rustqc: false
+
 #############################################
 #Strandedness settings - these should match #
 #############################################
@@ -27,15 +34,14 @@ featurecounts_strand: "2" # '1', '0'
 rsem_strandedness: "reverse" # 'foward', 'none'
 picard_strand: "SECOND_READ_TRANSCRIPTION_STRAND" # 'FIRST_READ_TRANSCRIPTION', 'NONE'
 
-
 #############################################
-# miscellaneous settings ####################
+# miscellaneous settings                    #
 #############################################
 cutadapt_nextseq_flag: "" # '--nextseq-trim-20'
 
 
 #############################################
-# Input reference fasta and GTF annotation ##
+# Input reference fasta and GTF annotation  #
 #############################################
 ###### 'sample/ref' contains a subset of hg38 that should be used only for pipeline testing.
 reference_fa: "sample_ref/hg38_chr567_100k.fa"
@@ -43,9 +49,9 @@ annotation_gtf: "sample_ref/hg38_chr567_100k.chr.gtf"
 
 
 #############################################
-# Aligner index and picard references #######
-# create and specify these or ###############
-# run snakemake build_ref rule to create ####
+# Aligner index and picard references       #
+# create and specify these or               #
+# run snakemake build_ref rule to create    #
 #############################################
 aligner_index: ''
 picard_refflat: ''
@@ -53,7 +59,7 @@ picard_rrna_list: ''
 rsem_ref_path: ''
 
 #############################################
-# Pipeline scripts ##########################
+# Pipeline scripts                          #
 #############################################
 picard_build_script: "scripts/picard_ref_builder.sh"
 featurecounts_rscript: "scripts/readcnt_to_rpkmtpm.py"
diff --git a/job.script.sh b/job.script.sh
index acbac46..75149b9 100644
--- a/job.script.sh
+++ b/job.script.sh
@@ -16,11 +16,13 @@ conda activate /dartfs/rc/nosnapshots/G/GMBSR_refs/envs/snakemake
 #----- Make slurm logs folder
 mkdir -p slurm_logs
 
+#! If using 'RUN_RUSTQC: True', include the following line in the snakemake call
+# --use-singularity \
+# --singularity-args "--bind /dartfs,/optnfs" \
+
 #----- Call Snakemake
 snakemake -s Snakefile \
 	--use-conda \
-	--use-singularity \
-	--singularity-args "--bind /dartfs,/optnfs" \
 	--conda-prefix /dartfs/rc/nosnapshots/G/GMBSR_refs/envs/DAC-RNAseq-pipeline \
 	--profile cluster_profile \
 	--rerun-incomplete \
diff --git a/prebuilt_configs/human_config_paired_hisat.yaml b/prebuilt_configs/human_config_paired_hisat.yaml
index d55dd0f..c82de0c 100644
--- a/prebuilt_configs/human_config_paired_hisat.yaml
+++ b/prebuilt_configs/human_config_paired_hisat.yaml
@@ -1,16 +1,34 @@
-###~~~~~~ general settings ~~~~~~###
+#############################################
+# General settings                          #
+#############################################
 
 # sample meta data file
 sample_csv: "sample_fastq_list_paired.csv"
 
-# sequencing config
+#############################################
+# Toggleable parameters                     #
+#############################################
+
+# library layout
 layout: "paired"
-#cutadapt_nextseq_flag: "--nextseq-trim=20"
-cutadapt_nextseq_flag: ""
 
+# aligner choice
+aligner_name: "hisat" #'hisat' or 'star'
+
+# rsem choice
+run_rsem: "no"
+
+# Run Ribodetector for rRNA filtering
 remove_rRNA: true
 read_length: 51
 
+# Run RustQC (14 tool comprehensive QC in a single pass)
+run_rustqc: false
+
+# sequencing config
+#cutadapt_nextseq_flag: "--nextseq-trim=20"
+cutadapt_nextseq_flag: ""
+
 
 #############################################
 # Software Paths for use with "--use-conda" #
@@ -24,9 +42,9 @@ rsem_calc_exp_path: "rsem-calculate-expression"
 rsem_prep_ref_path: "rsem-prepare-reference"
 multiqc_path: "multiqc"
 
-############################################# 
-# Custom Software Paths #####################
-############################################# 
+#############################################
+# Custom Software Paths                     #
+#############################################
 #cutadapt_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/cutadapt"
 #multiqc_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/multiqc"
 fastqc_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/fastqc/FastQC/fastqc"
@@ -38,20 +56,22 @@ rsem_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/owen/software/bin"
 #multiqc_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/multiqc"
 pca_plot_script: "scripts/pca_plotting.py"
 
-###~~~~~~ alignment settings ~~~~~~###
-aligner_name: "hisat" #'hisat' or 'star'
+#############################################
+# Alignment settings                        #
+#############################################
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/STAR/STAR-2.7.1a/bin/Linux_x86_64/STAR"
 #aligner_index: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/STAR/hg38_index"
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/hisat2/hisat2-2.1.0/hisat2"
 aligner_index: "sample_ref/HISAT_index/hg38_chr567_100k"
 
-###~~~~~~ quant settings ~~~~~~###
+#############################################
+# Quantification settings                   #
+#############################################
 #annotation_gtf: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/ensembl-annotations/Homo_sapiens.GRCh38.97.gtf"
 annotation_gtf: "sample_ref/hg38_chr567_100k.chr.gtf"
 featurecounts_strand: "2" #1 for first read transcription strand, 2 for second.
 featurecounts_rscript: "scripts/readcnt_to_rpkmtpm.py"
 featurecounts_annscript: "scripts/add_gene_to_ensg.py"
-run_rsem: "no"
 rsem_ref_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/RSEM/txdir/RSEMref"
 rsem_strandedness: "unstranded"
 
diff --git a/prebuilt_configs/human_config_paired_star.yaml b/prebuilt_configs/human_config_paired_star.yaml
index 20908b1..5ea95c2 100644
--- a/prebuilt_configs/human_config_paired_star.yaml
+++ b/prebuilt_configs/human_config_paired_star.yaml
@@ -1,16 +1,34 @@
-###~~~~~~ general settings ~~~~~~###
+#############################################
+# General settings                          #
+#############################################
 
 # sample meta data file
 sample_csv: "sample_fastq_list_paired.csv"
 
-# sequencing config
+#############################################
+# Toggleable parameters                     #
+#############################################
+
+# library layout
 layout: "paired"
-#cutadapt_nextseq_flag: "--nextseq-trim=20"
-cutadapt_nextseq_flag: ""
 
+# aligner choice
+aligner_name: "star" #'hisat' or 'star'
+
+# rsem choice
+run_rsem: "no"
+
+# Run Ribodetector for rRNA filtering
 remove_rRNA: true
 read_length: 51
 
+# Run RustQC (14 tool comprehensive QC in a single pass)
+run_rustqc: false
+
+# sequencing config
+#cutadapt_nextseq_flag: "--nextseq-trim=20"
+cutadapt_nextseq_flag: ""
+
 
 #############################################
 # Software Paths for use with "--use-conda" #
@@ -26,9 +44,9 @@ rsem_prep_ref_path: "rsem-prepare-reference"
 multiqc_path: "multiqc"
 pca_plot_script: "scripts/pca_plotting.py"
 
-############################################# 
-# Custom Software Paths #####################
-############################################# 
+#############################################
+# Custom Software Paths                     #
+#############################################
 #cutadapt_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/cutadapt"
 #multiqc_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/multiqc"
 fastqc_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/fastqc/FastQC/fastqc"
@@ -39,25 +57,29 @@ java_path: "/usr/bin/java"
 rsem_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/owen/software/bin"
 #multiqc_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/multiqc"
 
-###~~~~~~ alignment settings ~~~~~~###
-aligner_name: "star" #'hisat' or 'star'
+#############################################
+# Alignment settings                        #
+#############################################
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/STAR/STAR-2.7.1a/bin/Linux_x86_64/STAR"
 #aligner_index: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/STAR/hg38_index"
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/hisat2/hisat2-2.1.0/hisat2"
 aligner_index: "sample_ref/hg38_chr567_100k"
 
-###~~~~~~ quant settings ~~~~~~###
+#############################################
+# Quantification settings                   #
+#############################################
 #annotation_gtf: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/ensembl-annotations/Homo_sapiens.GRCh38.97.gtf"
 annotation_gtf: "sample_ref/hg38_chr567_100k.chr.gtf"
 featurecounts_strand: "2" #1 for first read transcription strand, 2 for second.
 # featurecounts_rscript: "scripts/fc_to_rpkmtpm.R"
 featurecounts_rscript: "scripts/readcnt_to_rpkmtpm.py"
 featurecounts_annscript: "scripts/add_gene_to_ensg.py"
-run_rsem: "no"
 #rsem_ref_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/RSEM/txdir/RSEMref"
 rsem_strandedness: "unstranded"
 
-###~~~~~~ other ~~~~~~###
+#############################################
+# Other settings                            #
+#############################################
 picard_rrna_list: "sample_ref/hg38_chr567_100k_rRNA.interval.list"
 #picard_rrna_list: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/CollectRnaSeqMetrics/Homo_sapiens.GRCh38.97.rRNA.interval_list"
 #picard_refflat: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/CollectRnaSeqMetrics/Homo_sapiens.GRCh38.97.refFlat.txt"
diff --git a/prebuilt_configs/human_config_paired_star_rsem.yaml b/prebuilt_configs/human_config_paired_star_rsem.yaml
index d070638..fc7f673 100644
--- a/prebuilt_configs/human_config_paired_star_rsem.yaml
+++ b/prebuilt_configs/human_config_paired_star_rsem.yaml
@@ -1,16 +1,34 @@
-###~~~~~~ general settings ~~~~~~###
+#############################################
+# General settings                          #
+#############################################
 
 # sample meta data file
 sample_csv: "sample_fastq_list_paired.csv"
 
-# sequencing config
+#############################################
+# Toggleable parameters                     #
+#############################################
+
+# library layout
 layout: "paired"
-#cutadapt_nextseq_flag: "--nextseq-trim=20"
-cutadapt_nextseq_flag: ""
 
+# aligner choice
+aligner_name: "star" #'hisat' or 'star'
+
+# rsem choice
+run_rsem: "yes"
+
+# Run Ribodetector for rRNA filtering
 remove_rRNA: true
 read_length: 51
 
+# Run RustQC (14 tool comprehensive QC in a single pass)
+run_rustqc: false
+
+# sequencing config
+#cutadapt_nextseq_flag: "--nextseq-trim=20"
+cutadapt_nextseq_flag: ""
+
 
 #############################################
 # Software Paths for use with "--use-conda" #
@@ -26,9 +44,9 @@ rsem_prep_ref_path: "rsem-prepare-reference"
 multiqc_path: "multiqc"
 pca_plot_script: "scripts/pca_plotting.py"
 
-############################################# 
-# Custom Software Paths #####################
-############################################# 
+#############################################
+# Custom Software Paths                     #
+#############################################
 #cutadapt_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/cutadapt"
 #multiqc_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/multiqc"
 fastqc_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/fastqc/FastQC/fastqc"
@@ -39,26 +57,30 @@ java_path: "/usr/bin/java"
 #rsem_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/owen/software/bin"
 #multiqc_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/multiqc"
 
-###~~~~~~ alignment settings ~~~~~~###
-aligner_name: "star" #'hisat' or 'star'
+#############################################
+# Alignment settings                        #
+#############################################
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/STAR/STAR-2.7.1a/bin/Linux_x86_64/STAR"
 #aligner_index: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/STAR/hg38_index"
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/hisat2/hisat2-2.1.0/hisat2"
 aligner_index: "sample_ref/hg38_chr567_100k"
 
-###~~~~~~ quant settings ~~~~~~###
+#############################################
+# Quantification settings                   #
+#############################################
 #annotation_gtf: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/ensembl-annotations/Homo_sapiens.GRCh38.97.gtf"
 annotation_gtf: "sample_ref/hg38_chr567_100k.chr.gtf"
 featurecounts_strand: "2" #1 for first read transcription strand, 2 for second.
 # featurecounts_rscript: "scripts/fc_to_rpkmtpm.R"
 featurecounts_rscript: "scripts/readcnt_to_rpkmtpm.py"
 featurecounts_annscript: "scripts/add_gene_to_ensg.py"
-run_rsem: "yes"
 #rsem_ref_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/RSEM/txdir/RSEMref"
 rsem_ref_path: "sample_ref/RSEM_ref/ref"
 rsem_strandedness: "reverse"
 
-###~~~~~~ other ~~~~~~###
+#############################################
+# Other settings                            #
+#############################################
 picard_rrna_list: "sample_ref/hg38_chr567_100k_rRNA.interval.list"
 #picard_rrna_list: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/CollectRnaSeqMetrics/Homo_sapiens.GRCh38.97.rRNA.interval_list"
 #picard_refflat: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/CollectRnaSeqMetrics/Homo_sapiens.GRCh38.97.refFlat.txt"
diff --git a/prebuilt_configs/human_config_single_hisat.yaml b/prebuilt_configs/human_config_single_hisat.yaml
index aa7c6b7..0ca8bc2 100644
--- a/prebuilt_configs/human_config_single_hisat.yaml
+++ b/prebuilt_configs/human_config_single_hisat.yaml
@@ -1,16 +1,34 @@
-###~~~~~~ general settings ~~~~~~###
+#############################################
+# General settings                          #
+#############################################
 
 # sample meta data file
 sample_csv: "sample_fastq_list_single.csv"
 
-# sequencing config
+#############################################
+# Toggleable parameters                     #
+#############################################
+
+# library layout
 layout: "single"
-#cutadapt_nextseq_flag: "--nextseq-trim=20"
-cutadapt_nextseq_flag: ""
 
+# aligner choice
+aligner_name: "hisat" #'hisat' or 'star'
+
+# rsem choice
+run_rsem: "no"
+
+# Run Ribodetector for rRNA filtering
 remove_rRNA: true
 read_length: 51
 
+# Run RustQC (14 tool comprehensive QC in a single pass)
+run_rustqc: false
+
+# sequencing config
+#cutadapt_nextseq_flag: "--nextseq-trim=20"
+cutadapt_nextseq_flag: ""
+
 
 #############################################
 # Software Paths for use with "--use-conda" #
@@ -25,9 +43,9 @@ rsem_prep_ref_path: "rsem-prepare-reference"
 multiqc_path: "multiqc"
 pca_plot_script: "scripts/pca_plotting.py"
 
-############################################# 
-# Custom Software Paths #####################
-############################################# 
+#############################################
+# Custom Software Paths                     #
+#############################################
 #cutadapt_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/cutadapt"
 #multiqc_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/multiqc"
 fastqc_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/fastqc/FastQC/fastqc"
@@ -38,25 +56,29 @@ java_path: "/usr/bin/java"
 rsem_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/owen/software/bin"
 #multiqc_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/multiqc"
 
-###~~~~~~ alignment settings ~~~~~~###
-aligner_name: "hisat" #'hisat' or 'star'
+#############################################
+# Alignment settings                        #
+#############################################
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/STAR/STAR-2.7.1a/bin/Linux_x86_64/STAR"
 #aligner_index: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/STAR/hg38_index"
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/hisat2/hisat2-2.1.0/hisat2"
 aligner_index: "sample_ref/HISAT_index/hg38_chr567_100k"
 
-###~~~~~~ quant settings ~~~~~~###
+#############################################
+# Quantification settings                   #
+#############################################
 #annotation_gtf: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/ensembl-annotations/Homo_sapiens.GRCh38.97.gtf"
 annotation_gtf: "sample_ref/hg38_chr567_100k.chr.gtf"
 featurecounts_strand: "1" #1 for first read transcription strand, 2 for second.
 # featurecounts_rscript: "scripts/fc_to_rpkmtpm.R"
 featurecounts_rscript: "scripts/readcnt_to_rpkmtpm.py"
 featurecounts_annscript: "scripts/add_gene_to_ensg.py"
-run_rsem: "no"
 rsem_ref_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/RSEM/txdir/RSEMref"
 rsem_strandedness: "unstranded"
 
-###~~~~~~ other ~~~~~~###
+#############################################
+# Other settings                            #
+#############################################
 picard_rrna_list: "sample_ref/hg38_chr567_100k_rRNA.interval.list"
 #picard_rrna_list: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/CollectRnaSeqMetrics/Homo_sapiens.GRCh38.97.rRNA.interval_list"
 #picard_refflat: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/CollectRnaSeqMetrics/Homo_sapiens.GRCh38.97.refFlat.txt"
diff --git a/prebuilt_configs/human_config_single_star.yaml b/prebuilt_configs/human_config_single_star.yaml
index 9f9f6c7..0ab8676 100644
--- a/prebuilt_configs/human_config_single_star.yaml
+++ b/prebuilt_configs/human_config_single_star.yaml
@@ -1,16 +1,34 @@
-###~~~~~~ general settings ~~~~~~###
+#############################################
+# General settings                          #
+#############################################
 
 # sample meta data file
 sample_csv: "sample_fastq_list_single.csv"
 
-# sequencing config
+#############################################
+# Toggleable parameters                     #
+#############################################
+
+# library layout
 layout: "single"
-#cutadapt_nextseq_flag: "--nextseq-trim=20"
-cutadapt_nextseq_flag: ""
 
+# aligner choice
+aligner_name: "star" #'hisat' or 'star'
+
+# rsem choice
+run_rsem: "no"
+
+# Run Ribodetector for rRNA filtering
 remove_rRNA: true
 read_length: 51
 
+# Run RustQC (14 tool comprehensive QC in a single pass)
+run_rustqc: false
+
+# sequencing config
+#cutadapt_nextseq_flag: "--nextseq-trim=20"
+cutadapt_nextseq_flag: ""
+
 
 #############################################
 # Software Paths for use with "--use-conda" #
@@ -25,9 +43,9 @@ rsem_prep_ref_path: "rsem-prepare-reference"
 multiqc_path: "multiqc"
 pca_plot_script: "scripts/pca_plotting.py"
 
-############################################# 
-# Custom Software Paths #####################
-############################################# 
+#############################################
+# Custom Software Paths                     #
+#############################################
 #cutadapt_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/cutadapt"
 #multiqc_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/multiqc"
 fastqc_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/fastqc/FastQC/fastqc"
@@ -37,25 +55,29 @@ java_path: "/usr/bin/java"
 #featurecounts_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/featurecounts/subread-2.0.1-Linux-x86_64/bin/featureCounts"
 rsem_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/owen/software/bin"
 
-###~~~~~~ alignment settings ~~~~~~###
-aligner_name: "star" #'hisat' or 'star'
+#############################################
+# Alignment settings                        #
+#############################################
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/STAR/STAR-2.7.1a/bin/Linux_x86_64/STAR"
 #aligner_index: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/STAR/hg38_index"
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/hisat2/hisat2-2.1.0/hisat2"
 aligner_index: "sample_ref/hg38_chr567_100k"
 
-###~~~~~~ quant settings ~~~~~~###
+#############################################
+# Quantification settings                   #
+#############################################
 #annotation_gtf: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/ensembl-annotations/Homo_sapiens.GRCh38.97.gtf"
 annotation_gtf: "sample_ref/hg38_chr567_100k.chr.gtf"
 featurecounts_strand: "1" #1 for first read transcription strand, 2 for second.
 # featurecounts_rscript: "scripts/fc_to_rpkmtpm.R"
 featurecounts_rscript: "scripts/readcnt_to_rpkmtpm.py"
 featurecounts_annscript: "scripts/add_gene_to_ensg.py"
-run_rsem: "no"
 #rsem_ref_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/RSEM/txdir/RSEMref"
 rsem_strandedness: "unstranded"
 
-###~~~~~~ other ~~~~~~###
+#############################################
+# Other settings                            #
+#############################################
 picard_rrna_list: "sample_ref/hg38_chr567_100k_rRNA.interval.list"
 #picard_rrna_list: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/CollectRnaSeqMetrics/Homo_sapiens.GRCh38.97.rRNA.interval_list"
 #picard_refflat: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/CollectRnaSeqMetrics/Homo_sapiens.GRCh38.97.refFlat.txt"
diff --git a/prebuilt_configs/mouse_config_paired_hisat.yaml b/prebuilt_configs/mouse_config_paired_hisat.yaml
index ec3b9a6..c460856 100644
--- a/prebuilt_configs/mouse_config_paired_hisat.yaml
+++ b/prebuilt_configs/mouse_config_paired_hisat.yaml
@@ -1,16 +1,35 @@
-###~~~~~~ general settings ~~~~~~###
+#############################################
+# General settings                          #
+#############################################
 
 # sample meta data file
 sample_csv: "sample_fastq_list_paired.csv"
 
-# sequencing config
+#############################################
+# Toggleable parameters                     #
+#############################################
+
+# library layout
 layout: "paired"
-#cutadapt_nextseq_flag: "--nextseq-trim=20"
-cutadapt_nextseq_flag: ""
 
+# aligner choice
+aligner_name: "hisat" #'hisat' or 'star'
+
+# rsem choice
+run_rsem: "no"
+
+# Run Ribodetector for rRNA filtering
 remove_rRNA: true
 read_length: 51
 
+# Run RustQC (14 tool comprehensive QC in a single pass)
+run_rustqc: false
+
+# sequencing config
+#cutadapt_nextseq_flag: "--nextseq-trim=20"
+cutadapt_nextseq_flag: ""
+
+
 #############################################
 # Software Paths for use with "--use-conda" #
 ############################################# 
@@ -24,9 +43,9 @@ rsem_prep_ref_path: "rsem-prepare-reference"
 multiqc_path: "multiqc"
 pca_plot_script: "scripts/pca_plotting.py"
 
-############################################# 
-# Custom Software Paths #####################
-############################################# 
+#############################################
+# Custom Software Paths                     #
+#############################################
 #cutadapt_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/cutadapt"
 #multiqc_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/multiqc"
 fastqc_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/fastqc/FastQC/fastqc"
@@ -37,25 +56,29 @@ java_path: "/usr/bin/java"
 rsem_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/owen/software/bin"
 #multiqc_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/multiqc"
 
-###~~~~~~ alignment settings ~~~~~~###
-aligner_name: "hisat" #'hisat' or 'star'
+#############################################
+# Alignment settings                        #
+#############################################
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/STAR/STAR-2.7.1a/bin/Linux_x86_64/STAR"
 #aligner_index: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/STAR/hg38_index"
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/hisat2/hisat2-2.1.0/hisat2"
 aligner_index: "sample_ref/HISAT_index/hg38_chr567_100k"
 
-###~~~~~~ quant settings ~~~~~~###
+#############################################
+# Quantification settings                   #
+#############################################
 #annotation_gtf: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/ensembl-annotations/Homo_sapiens.GRCh38.97.gtf"
 annotation_gtf: "sample_ref/hg38_chr567_100k.chr.gtf"
 featurecounts_strand: "2" #1 for first read transcription strand, 2 for second.
 # featurecounts_rscript: "scripts/fc_to_rpkmtpm.R"
 featurecounts_rscript: "scripts/readcnt_to_rpkmtpm.py"
 featurecounts_annscript: "scripts/add_gene_to_ensg.py"
-run_rsem: "no"
 rsem_ref_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/RSEM/txdir/RSEMref"
 rsem_strandedness: "unstranded"
 
-###~~~~~~ other ~~~~~~###
+#############################################
+# Other settings                            #
+#############################################
 picard_rrna_list: "sample_ref/hg38_chr567_100k_rRNA.interval.list"
 #picard_rrna_list: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/CollectRnaSeqMetrics/Homo_sapiens.GRCh38.97.rRNA.interval_list"
 #picard_refflat: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/CollectRnaSeqMetrics/Homo_sapiens.GRCh38.97.refFlat.txt"
diff --git a/prebuilt_configs/mouse_config_paired_star.yaml b/prebuilt_configs/mouse_config_paired_star.yaml
index 772a54b..650dfdd 100644
--- a/prebuilt_configs/mouse_config_paired_star.yaml
+++ b/prebuilt_configs/mouse_config_paired_star.yaml
@@ -1,16 +1,35 @@
-###~~~~~~ general settings ~~~~~~###
+#############################################
+# General settings                          #
+#############################################
 
 # sample meta data file
 sample_csv: "sample_fastq_list_paired.csv"
 
-# sequencing config
+#############################################
+# Toggleable parameters                     #
+#############################################
+
+# library layout
 layout: "paired"
-#cutadapt_nextseq_flag: "--nextseq-trim=20"
-cutadapt_nextseq_flag: ""
 
+# aligner choice
+aligner_name: "star" #'hisat' or 'star'
+
+# rsem choice
+run_rsem: "no"
+
+# Run Ribodetector for rRNA filtering
 remove_rRNA: true
 read_length: 51
 
+# Run RustQC (14 tool comprehensive QC in a single pass)
+run_rustqc: false
+
+# sequencing config
+#cutadapt_nextseq_flag: "--nextseq-trim=20"
+cutadapt_nextseq_flag: ""
+
+
 #############################################
 # Software Paths for use with "--use-conda" #
 ############################################# 
@@ -25,9 +44,9 @@ rsem_prep_ref_path: "rsem-prepare-reference"
 multiqc_path: "multiqc"
 pca_plot_script: "scripts/pca_plotting.py"
 
-############################################# 
-# Custom Software Paths #####################
-############################################# 
+#############################################
+# Custom Software Paths                     #
+#############################################
 #cutadapt_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/cutadapt"
 #multiqc_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/multiqc"
 fastqc_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/fastqc/FastQC/fastqc"
@@ -38,25 +57,29 @@ java_path: "/usr/bin/java"
 rsem_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/owen/software/bin"
 #multiqc_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/multiqc"
 
-###~~~~~~ alignment settings ~~~~~~###
-aligner_name: "star" #'hisat' or 'star'
+#############################################
+# Alignment settings                        #
+#############################################
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/STAR/STAR-2.7.1a/bin/Linux_x86_64/STAR"
 #aligner_index: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/STAR/hg38_index"
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/hisat2/hisat2-2.1.0/hisat2"
 aligner_index: "sample_ref/hg38_chr567_100k"
 
-###~~~~~~ quant settings ~~~~~~###
+#############################################
+# Quantification settings                   #
+#############################################
 #annotation_gtf: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/ensembl-annotations/Homo_sapiens.GRCh38.97.gtf"
 annotation_gtf: "sample_ref/hg38_chr567_100k.chr.gtf"
 featurecounts_strand: "2" #1 for first read transcription strand, 2 for second.
 # featurecounts_rscript: "scripts/fc_to_rpkmtpm.R"
 featurecounts_rscript: "scripts/readcnt_to_rpkmtpm.py"
 featurecounts_annscript: "scripts/add_gene_to_ensg.py"
-run_rsem: "no"
 #rsem_ref_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/RSEM/txdir/RSEMref"
 rsem_strandedness: "unstranded"
 
-###~~~~~~ other ~~~~~~###
+#############################################
+# Other settings                            #
+#############################################
 picard_rrna_list: "sample_ref/hg38_chr567_100k_rRNA.interval.list"
 #picard_rrna_list: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/CollectRnaSeqMetrics/Homo_sapiens.GRCh38.97.rRNA.interval_list"
 #picard_refflat: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/CollectRnaSeqMetrics/Homo_sapiens.GRCh38.97.refFlat.txt"
diff --git a/prebuilt_configs/mouse_config_paired_star_rsem.yaml b/prebuilt_configs/mouse_config_paired_star_rsem.yaml
index 6214610..5bf0269 100644
--- a/prebuilt_configs/mouse_config_paired_star_rsem.yaml
+++ b/prebuilt_configs/mouse_config_paired_star_rsem.yaml
@@ -1,16 +1,35 @@
-###~~~~~~ general settings ~~~~~~###
+#############################################
+# General settings                          #
+#############################################
 
 # sample meta data file
 sample_csv: "sample_fastq_list_paired.cdv"
 
-# sequencing config
+#############################################
+# Toggleable parameters                     #
+#############################################
+
+# library layout
 layout: "paired"
-#cutadapt_nextseq_flag: "--nextseq-trim=20"
-cutadapt_nextseq_flag: ""
 
+# aligner choice
+aligner_name: "star" #'hisat' or 'star'
+
+# rsem choice
+run_rsem: "yes"
+
+# Run Ribodetector for rRNA filtering
 remove_rRNA: true
 read_length: 51
 
+# Run RustQC (14 tool comprehensive QC in a single pass)
+run_rustqc: false
+
+# sequencing config
+#cutadapt_nextseq_flag: "--nextseq-trim=20"
+cutadapt_nextseq_flag: ""
+
+
 #############################################
 # Software Paths for use with "--use-conda" #
 ############################################# 
@@ -25,32 +44,36 @@ rsem_prep_ref_path: "rsem-prepare-reference"
 multiqc_path: "multiqc"
 pca_plot_script: "scripts/pca_plotting.py"
 
-############################################# 
-# Custom Software Paths #####################
-############################################# 
+#############################################
+# Custom Software Paths                     #
+#############################################
 fastqc_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/fastqc/FastQC/fastqc"
 java_path: "/usr/bin/java"
 
-###~~~~~~ alignment settings ~~~~~~###
-aligner_name: "star" #'hisat' or 'star'
+#############################################
+# Alignment settings                        #
+#############################################
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/STAR/STAR-2.7.1a/bin/Linux_x86_64/STAR"
 #aligner_index: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/STAR/hg38_index"
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/hisat2/hisat2-2.1.0/hisat2"
 aligner_index: "sample_ref/hg38_chr567_100k"
 
-###~~~~~~ quant settings ~~~~~~###
+#############################################
+# Quantification settings                   #
+#############################################
 #annotation_gtf: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/ensembl-annotations/Homo_sapiens.GRCh38.97.gtf"
 annotation_gtf: "sample_ref/hg38_chr567_100k.chr.gtf"
 featurecounts_strand: "2" #1 for first read transcription strand, 2 for second.
 # featurecounts_rscript: "scripts/fc_to_rpkmtpm.R"
 featurecounts_rscript: "scripts/readcnt_to_rpkmtpm.py"
 featurecounts_annscript: "scripts/add_gene_to_ensg.py"
-run_rsem: "yes"
 #rsem_ref_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/RSEM/txdir/RSEMref"
 rsem_ref_path: "sample_ref/RSEM_ref/ref"
 rsem_strandedness: "reverse"
 
-###~~~~~~ other ~~~~~~###
+#############################################
+# Other settings                            #
+#############################################
 picard_rrna_list: "sample_ref/hg38_chr567_100k_rRNA.interval.list"
 #picard_rrna_list: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/CollectRnaSeqMetrics/Homo_sapiens.GRCh38.97.rRNA.interval_list"
 #picard_refflat: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/CollectRnaSeqMetrics/Homo_sapiens.GRCh38.97.refFlat.txt"
diff --git a/prebuilt_configs/mouse_config_single_hisat.yaml b/prebuilt_configs/mouse_config_single_hisat.yaml
index 870bf3c..5c06adb 100644
--- a/prebuilt_configs/mouse_config_single_hisat.yaml
+++ b/prebuilt_configs/mouse_config_single_hisat.yaml
@@ -1,16 +1,35 @@
-###~~~~~~ general settings ~~~~~~###
+#############################################
+# General settings                          #
+#############################################
 
 # sample meta data file
 sample_csv: "sample_fastq_list_single.csv"
 
-# sequencing config
+#############################################
+# Toggleable parameters                     #
+#############################################
+
+# library layout
 layout: "single"
-#cutadapt_nextseq_flag: "--nextseq-trim=20"
-cutadapt_nextseq_flag: ""
 
+# aligner choice
+aligner_name: "hisat" #'hisat' or 'star'
+
+# rsem choice
+run_rsem: "no"
+
+# Run Ribodetector for rRNA filtering
 remove_rRNA: true
 read_length: 51
 
+# Run RustQC (14 tool comprehensive QC in a single pass)
+run_rustqc: false
+
+# sequencing config
+#cutadapt_nextseq_flag: "--nextseq-trim=20"
+cutadapt_nextseq_flag: ""
+
+
 #############################################
 # Software Paths for use with "--use-conda" #
 ############################################# 
@@ -24,9 +43,9 @@ rsem_prep_ref_path: "rsem-prepare-reference"
 multiqc_path: "multiqc"
 pca_plot_script: "scripts/pca_plotting.py"
 
-############################################# 
-# Custom Software Paths #####################
-############################################# 
+#############################################
+# Custom Software Paths                     #
+#############################################
 #cutadapt_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/cutadapt"
 #multiqc_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/multiqc"
 fastqc_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/fastqc/FastQC/fastqc"
@@ -37,25 +56,29 @@ java_path: "/usr/bin/java"
 rsem_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/owen/software/bin"
 #multiqc_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/multiqc"
 
-###~~~~~~ alignment settings ~~~~~~###
-aligner_name: "hisat" #'hisat' or 'star'
+#############################################
+# Alignment settings                        #
+#############################################
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/STAR/STAR-2.7.1a/bin/Linux_x86_64/STAR"
 #aligner_index: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/STAR/hg38_index"
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/hisat2/hisat2-2.1.0/hisat2"
 aligner_index: "sample_ref/HISAT_index/hg38_chr567_100k"
 
-###~~~~~~ quant settings ~~~~~~###
+#############################################
+# Quantification settings                   #
+#############################################
 #annotation_gtf: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/ensembl-annotations/Homo_sapiens.GRCh38.97.gtf"
 annotation_gtf: "sample_ref/hg38_chr567_100k.chr.gtf"
 featurecounts_strand: "1" #1 for first read transcription strand, 2 for second.
 # featurecounts_rscript: "scripts/fc_to_rpkmtpm.R"
 featurecounts_rscript: "scripts/readcnt_to_rpkmtpm.py"
 featurecounts_annscript: "scripts/add_gene_to_ensg.py"
-run_rsem: "no"
 rsem_ref_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/RSEM/txdir/RSEMref"
 rsem_strandedness: "unstranded"
 
-###~~~~~~ other ~~~~~~###
+#############################################
+# Other settings                            #
+#############################################
 picard_rrna_list: "sample_ref/hg38_chr567_100k_rRNA.interval.list"
 #picard_rrna_list: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/CollectRnaSeqMetrics/Homo_sapiens.GRCh38.97.rRNA.interval_list"
 #picard_refflat: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/CollectRnaSeqMetrics/Homo_sapiens.GRCh38.97.refFlat.txt"
diff --git a/prebuilt_configs/mouse_config_single_star.yaml b/prebuilt_configs/mouse_config_single_star.yaml
index 0c62dac..4170745 100644
--- a/prebuilt_configs/mouse_config_single_star.yaml
+++ b/prebuilt_configs/mouse_config_single_star.yaml
@@ -1,16 +1,35 @@
-###~~~~~~ general settings ~~~~~~###
+#############################################
+# General settings                          #
+#############################################
 
 # sample meta data file
 sample_csv: "sample_fastq_list_single.csv"
 
-# sequencing config
+#############################################
+# Toggleable parameters                     #
+#############################################
+
+# library layout
 layout: "single"
-#cutadapt_nextseq_flag: "--nextseq-trim=20"
-cutadapt_nextseq_flag: ""
 
+# aligner choice
+aligner_name: "star" #'hisat' or 'star'
+
+# rsem choice
+run_rsem: "no"
+
+# Run Ribodetector for rRNA filtering
 remove_rRNA: true
 read_length: 51
 
+# Run RustQC (14 tool comprehensive QC in a single pass)
+run_rustqc: false
+
+# sequencing config
+#cutadapt_nextseq_flag: "--nextseq-trim=20"
+cutadapt_nextseq_flag: ""
+
+
 #############################################
 # Software Paths for use with "--use-conda" #
 ############################################# 
@@ -25,9 +44,9 @@ rsem_prep_ref_path: "rsem-prepare-reference"
 multiqc_path: "multiqc"
 pca_plot_script: "scripts/pca_plotting.py"
 
-############################################# 
-# Custom Software Paths #####################
-############################################# 
+#############################################
+# Custom Software Paths                     #
+#############################################
 #cutadapt_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/cutadapt"
 #multiqc_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/multiqc"
 fastqc_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/fastqc/FastQC/fastqc"
@@ -38,25 +57,29 @@ java_path: "/usr/bin/java"
 rsem_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/owen/software/bin"
 #multiqc_path: "/dartfs-hpc/rc/home/d/d41294d/.conda/envs/rnaseq1/bin/multiqc"
 
-###~~~~~~ alignment settings ~~~~~~###
-aligner_name: "star" #'hisat' or 'star'
+#############################################
+# Alignment settings                        #
+#############################################
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/STAR/STAR-2.7.1a/bin/Linux_x86_64/STAR"
 #aligner_index: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/STAR/hg38_index"
 #aligner_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/misc/sullivan/tools/hisat2/hisat2-2.1.0/hisat2"
 aligner_index: "sample_ref/hg38_chr567_100k"
 
-###~~~~~~ quant settings ~~~~~~###
+#############################################
+# Quantification settings                   #
+#############################################
 #annotation_gtf: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/ensembl-annotations/Homo_sapiens.GRCh38.97.gtf"
 annotation_gtf: "sample_ref/hg38_chr567_100k.chr.gtf"
 featurecounts_strand: "1" #1 for first read transcription strand, 2 for second.
 # featurecounts_rscript: "scripts/fc_to_rpkmtpm.R"
 featurecounts_rscript: "scripts/readcnt_to_rpkmtpm.py"
 featurecounts_annscript: "scripts/add_gene_to_ensg.py"
-run_rsem: "no"
 #rsem_ref_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/RSEM/txdir/RSEMref"
 rsem_strandedness: "unstranded"
 
-###~~~~~~ other ~~~~~~###
+#############################################
+# Other settings                            #
+#############################################
 picard_rrna_list: "sample_ref/hg38_chr567_100k_rRNA.interval.list"
 #picard_rrna_list: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/CollectRnaSeqMetrics/Homo_sapiens.GRCh38.97.rRNA.interval_list"
 #picard_refflat: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/CollectRnaSeqMetrics/Homo_sapiens.GRCh38.97.refFlat.txt"
diff --git a/tests/test_config_paired_hisat.yaml b/tests/test_config_paired_hisat.yaml
index 918c93b..a801ca0 100644
--- a/tests/test_config_paired_hisat.yaml
+++ b/tests/test_config_paired_hisat.yaml
@@ -49,6 +49,7 @@ featurecounts_strand: "2" #1 for first read transcription strand, 2 for second.
 featurecounts_rscript: "scripts/readcnt_to_rpkmtpm.py"
 featurecounts_annscript: "scripts/add_gene_to_ensg.py"
 run_rsem: "no"
+run_rustqc: false
 rsem_ref_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/RSEM/txdir/RSEMref"
 rsem_strandedness: "unstranded"
 
diff --git a/tests/test_config_paired_star.yaml b/tests/test_config_paired_star.yaml
index ecbd4d9..3322012 100644
--- a/tests/test_config_paired_star.yaml
+++ b/tests/test_config_paired_star.yaml
@@ -50,6 +50,7 @@ featurecounts_strand: "2" #1 for first read transcription strand, 2 for second.
 featurecounts_rscript: "scripts/readcnt_to_rpkmtpm.py"
 featurecounts_annscript: "scripts/add_gene_to_ensg.py"
 run_rsem: "no"
+run_rustqc: false
 #rsem_ref_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/RSEM/txdir/RSEMref"
 rsem_strandedness: "unstranded"
 
diff --git a/tests/test_config_paired_star_rsem.yaml b/tests/test_config_paired_star_rsem.yaml
index ad2dc94..af8e4a1 100644
--- a/tests/test_config_paired_star_rsem.yaml
+++ b/tests/test_config_paired_star_rsem.yaml
@@ -50,6 +50,7 @@ featurecounts_strand: "2" #1 for first read transcription strand, 2 for second.
 featurecounts_rscript: "scripts/readcnt_to_rpkmtpm.py"
 featurecounts_annscript: "scripts/add_gene_to_ensg.py"
 run_rsem: "yes"
+run_rustqc: false
 #rsem_ref_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/RSEM/txdir/RSEMref"
 rsem_ref_path: "sample_ref/RSEM_ref/ref"
 rsem_strandedness: "unstranded"
diff --git a/tests/test_config_single_hisat.yaml b/tests/test_config_single_hisat.yaml
index 7f966a0..00a833b 100644
--- a/tests/test_config_single_hisat.yaml
+++ b/tests/test_config_single_hisat.yaml
@@ -61,3 +61,4 @@ picard_refflat: "sample_ref/hg38_chr567_refFlat_1M.txt"
 picard_strand: "FIRST_READ_TRANSCRIPTION_STRAND"
 reference_fa: "sample_ref/hg38_chr567_100k.fa"
 picard_build_script: "scripts/picard_ref_builder.sh"
+run_rustqc: false
diff --git a/tests/test_config_single_star.yaml b/tests/test_config_single_star.yaml
index 48e15d3..c614272 100644
--- a/tests/test_config_single_star.yaml
+++ b/tests/test_config_single_star.yaml
@@ -50,6 +50,7 @@ featurecounts_strand: "1" #1 for first read transcription strand, 2 for second.
 featurecounts_rscript: "scripts/readcnt_to_rpkmtpm.py"
 featurecounts_annscript: "scripts/add_gene_to_ensg.py"
 run_rsem: "no"
+run_rustqc: false
 #rsem_ref_path: "/dartfs-hpc/rc/lab/G/GMBSR_bioinfo/genomic_references/human/RSEM/txdir/RSEMref"
 rsem_strandedness: "unstranded"
 

From a31ff489a0c851e0852ea00d7aa3861f8a3aef5e Mon Sep 17 00:00:00 2001
From: Beatriz Bergamo <beatrizrbergamo@gmail.com>
Date: Tue, 21 Apr 2026 16:20:42 -0400
Subject: [PATCH 09/11] Apply RiboDetector only for paired-end samples

---
 Snakefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index aab731e..b6deb23 100644
--- a/Snakefile
+++ b/Snakefile
@@ -15,9 +15,9 @@ sample_list = list(samples_df['sample_id'])
 REMOVE_rRNA = config.get("remove_rRNA", False)
 RUN_RUSTQC = config.get("run_rustqc", False)
 
-if REMOVE_rRNA:
+if REMOVE_rRNA and config["layout"]=="paired":
     R1_FASTQ_INPUT = "ribodetector/{sample}/{sample}.nonrrna.1.fq.gz" 
-    R2_FASTQ_INPUT = "ribodetector/{sample}/{sample}.nonrrna.2.fq.gz" if config["layout"]=="paired" else None
+    R2_FASTQ_INPUT = "ribodetector/{sample}/{sample}.nonrrna.2.fq.gz" 
 else:
     R1_FASTQ_INPUT = "trimming/{sample}.R1.trim.fastq.gz"
     R2_FASTQ_INPUT = "trimming/{sample}.R2.trim.fastq.gz" if config["layout"]=="paired" else None

From df89afd31c518dff810ac5d4f4dcbb0d95593e5c Mon Sep 17 00:00:00 2001
From: Beatriz Bergamo <beatrizrbergamo@gmail.com>
Date: Tue, 21 Apr 2026 16:27:43 -0400
Subject: [PATCH 10/11] Removing -e parameter from else ribodetector (for
 single end)

---
 Snakefile | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Snakefile b/Snakefile
index b6deb23..19b0d61 100644
--- a/Snakefile
+++ b/Snakefile
@@ -15,9 +15,9 @@ sample_list = list(samples_df['sample_id'])
 REMOVE_rRNA = config.get("remove_rRNA", False)
 RUN_RUSTQC = config.get("run_rustqc", False)
 
-if REMOVE_rRNA and config["layout"]=="paired":
+if REMOVE_rRNA:
     R1_FASTQ_INPUT = "ribodetector/{sample}/{sample}.nonrrna.1.fq.gz" 
-    R2_FASTQ_INPUT = "ribodetector/{sample}/{sample}.nonrrna.2.fq.gz" 
+    R2_FASTQ_INPUT = "ribodetector/{sample}/{sample}.nonrrna.2.fq.gz" if config["layout"]=="paired" else None
 else:
     R1_FASTQ_INPUT = "trimming/{sample}.R1.trim.fastq.gz"
     R2_FASTQ_INPUT = "trimming/{sample}.R2.trim.fastq.gz" if config["layout"]=="paired" else None
@@ -177,7 +177,6 @@ rule ribodetector:
             ribodetector_cpu -t {resources.cpus} \
                 -l {params.read_length} \
                 -i {input.trimmed_read_1} \
-                -e norrna \
                 -o {output.filtered_read_1} \
                 -r {output.rrna_reads_1} \
                 > {log.stdout} 2> {log.stderr}

From 7af85df98953eef5a74885d07c1f88ab6ed56edf Mon Sep 17 00:00:00 2001
From: Mike Martinez <mike.j.martinez99@gmail.com>
Date: Tue, 21 Apr 2026 16:34:21 -0400
Subject: [PATCH 11/11] Remove script parameter from ribodetector rule

Removed script parameter from ribodetector rule.
---
 Snakefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 19b0d61..f074109 100644
--- a/Snakefile
+++ b/Snakefile
@@ -192,7 +192,6 @@ rule ribodetector_mqc_summary:
     output:
         "ribodetector/rrna_norrna_pct_mqc.tsv"
     params:
-        script = "scripts/ribodetector_mqc.sh",
         ribo_dir = "ribodetector"
     resources: cpus="1", maxtime="10:00", mem_mb=2000
     message: "Generating ribodetector report."