diff --git a/.editorconfig b/.editorconfig
deleted file mode 100644
index 5573fb6..0000000
--- a/.editorconfig
+++ /dev/null
@@ -1,15 +0,0 @@
-# EditorConfig is awesome: http://EditorConfig.org
-
-# top-most EditorConfig file
-root = true
-
-[*]
-end_of_line = lf
-insert_final_newline = true
-charset = utf-8
-indent_style = space
-indent_size = 4
-
-[*.{yml,yaml}]
-indent_style = space
-indent_size = 2
diff --git a/.github/workflows/conventional-prs.yml b/.github/workflows/conventional-prs.yml
index 82028b7..d0f5164 100644
--- a/.github/workflows/conventional-prs.yml
+++ b/.github/workflows/conventional-prs.yml
@@ -1,4 +1,5 @@
-name: Lint PR
+name: Conventional PRs
+
 on:
   pull_request_target:
     types:
@@ -11,10 +12,5 @@ permissions:
   pull-requests: read
 
 jobs:
-  main:
-    name: Validate PR title
-    runs-on: ubuntu-latest
-    steps:
-      - uses: amannn/action-semantic-pull-request@v5
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+  conventional-prs:
+    uses: MPUSP/mpusp-github-actions/.github/workflows/conventional-prs.yml@main
diff --git a/.github/workflows/deploy-apptainer.yml b/.github/workflows/deploy-apptainer.yml
new file mode 100644
index 0000000..aa93cbc
--- /dev/null
+++ b/.github/workflows/deploy-apptainer.yml
@@ -0,0 +1,17 @@
+name: Deploy Apptainer
+
+on:
+  workflow_run:
+    workflows: ["Release Please"]
+    types:
+      - completed
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  packages: write
+
+jobs:
+  deploy-apptainer:
+    if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }}
+    uses: MPUSP/mpusp-github-actions/.github/workflows/deploy-apptainer.yml@main
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
deleted file mode 100644
index b238a06..0000000
--- a/.github/workflows/main.yml
+++ /dev/null
@@ -1,58 +0,0 @@
-name: Tests
-
-on:
-  push:
-    branches: [main, dev]
-  pull_request:
-    branches: [main, dev]
-
-jobs:
-  Formatting:
-    runs-on: ubuntu-latest
-    if: ${{ github.actor != 'github-actions[bot]' }}
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-      - name: Formatting
-        uses: super-linter/super-linter@v7
-        env:
-          VALIDATE_ALL_CODEBASE: false
-          DEFAULT_BRANCH: main
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          VALIDATE_SNAKEMAKE_SNAKEFMT: true
-          VALIDATE_YAML_PRETTIER: true
-
-  Linting:
-    runs-on: ubuntu-latest
-    if: ${{ github.actor != 'github-actions[bot]' }}
-    steps:
-      - uses: actions/checkout@v4
-      - name: Lint workflow
-        uses: snakemake/snakemake-github-action@v1.25.1
-        with:
-          directory: .
-          snakefile: workflow/Snakefile
-          args: "--lint"
-
-  Testing:
-    runs-on: ubuntu-latest
-    if: ${{ github.actor != 'github-actions[bot]' }}
-    needs:
-      - Linting
-      - Formatting
-    steps:
-      - uses: actions/checkout@v4
-      - name: Test workflow
-        uses: snakemake/snakemake-github-action@v1.25.1
-        with:
-          directory: .test
-          snakefile: workflow/Snakefile
-          args: "--use-conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache"
-
-      - name: Test report
-        uses: snakemake/snakemake-github-action@v1.25.1
-        with:
-          directory: .test
-          snakefile: workflow/Snakefile
-          args: "--report report.zip"
diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml
index 7643ac7..b103aa0 100644
--- a/.github/workflows/release-please.yml
+++ b/.github/workflows/release-please.yml
@@ -1,19 +1,14 @@
+name: Release Please
+
 on:
   push:
-    branches:
-      - main
+    branches: [main]
 
 permissions:
   contents: write
   pull-requests: write
-
-name: release-please
+  issues: write
 
 jobs:
   release-please:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: googleapis/release-please-action@v4
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-          release-type: simple
+    uses: MPUSP/mpusp-github-actions/.github/workflows/release-please.yml@main
diff --git a/.github/workflows/snakemake-tests.yml b/.github/workflows/snakemake-tests.yml
new file mode 100644
index 0000000..7e58b06
--- /dev/null
+++ b/.github/workflows/snakemake-tests.yml
@@ -0,0 +1,12 @@
+name: Snakemake Tests
+
+on:
+  pull_request:
+    branches: [main]
+
+jobs:
+  snakemake-tests:
+    uses: MPUSP/mpusp-github-actions/.github/workflows/snakemake-tests.yml@main
+    with:
+      cores: 2
+      dryrun: false
diff --git a/.snakemake-workflow-catalog.yml b/.snakemake-workflow-catalog.yml
index b524029..b83a590 100644
--- a/.snakemake-workflow-catalog.yml
+++ b/.snakemake-workflow-catalog.yml
@@ -1,6 +1,11 @@
+# configuration of display in snakemake workflow catalog: https://snakemake.github.io/snakemake-workflow-catalog
+
 usage:
-  software-stack-deployment: # definition of software deployment method (at least one of conda, singularity, or singularity+conda)
-    conda: true # whether pipeline works with --use-conda
-    singularity: false # whether pipeline works with --use-singularity
-    singularity+conda: false # whether pipeline works with --use-singularity --use-conda
-  report: true # add this to confirm that the workflow allows to use 'snakemake --report report.zip' to generate a report containing all results and explanations
\ No newline at end of file
+  mandatory-flags:
+    desc: # describe your flags here in a few sentences
+    flags: # put your flags here
+  software-stack-deployment:
+    conda: true # whether pipeline works with '--sdm conda'
+    apptainer: false # whether pipeline works with '--sdm apptainer/singularity'
+    apptainer+conda: true # whether pipeline works with '--sdm conda apptainer/singularity'
+    report: true # whether creation of reports using 'snakemake --report report.zip' is supported
\ No newline at end of file
diff --git a/.test/config/config.yml b/.test/config/config.yml
index bce0678..4e7823a 100644
--- a/.test/config/config.yml
+++ b/.test/config/config.yml
@@ -1,5 +1,5 @@
-samplesheet: "config/samplesheet/samples_mpusp_custom.tsv"
-libtype: "sense"
+samplesheet: "config/samplesheet/samples_neb_umi.tsv"
+libtype: "antisense"
 
 get_genome:
   database: "ncbi"
@@ -11,48 +11,34 @@ get_genome:
       "RefSeq": "gene",
       "RefSeq": "pseudogene",
       "RefSeq": "CDS",
-      "Protein Homology": "CDS"
+      "Protein Homology": "CDS",
     ]
 
 extract_features:
   biotypes: ["protein_coding", "pseudogene", "ncRNA", "rRNA", "tRNA"]
 
 umi_extraction:
-  method: "string"
-  pattern: "--bc-pattern=NNNCCCNNNCCCNNNC"
+  method: "none"
+  pattern: ""
 umi_dedup: "--edit-distance-threshold=0"
 
-cutadapt:
-  read1_adapter: "TACACGACGCTCTTCCGATCT"
-  read2_adapter: "AGATCGGAAGAGCGTCGTGTA"
-  default:
-    [
-      "-q 10 ",
-      "-m 18 ",
-      "--overlap=3"
-    ]
+fastp:
+  extra: "-M 10 -l 18 --adapter_sequence AGATCGGAAGAGCACACGTCTGAACTCCAGTCA --adapter_sequence_r2 AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT"
 
 star:
-  index: Null
-  genomeSAindexNbases: 9
-  multi: 10
-  sam_multi: 1
-  intron_max: 1
-  default:
+  index: "--genomeSAindexNbases 9"
+  extra:
     [
-      "--readFilesCommand zcat ",
-      "--outSAMstrandField None ",
-      "--outSAMattributes All ",
-      "--outSAMattrIHstart 0 ",
-      "--outFilterType Normal ",
-      "--outFilterMultimapScoreRange 1 ",
-      "-o STARmappings ",
-      "--outSAMtype BAM Unsorted ",
-      "--outStd BAM_Unsorted ",
-      "--outMultimapperOrder Random ",
-      "--alignEndsType EndToEnd",
+      "--outFilterMultimapNmax 10",
+      "--outSAMmultNmax 1",
+      "--outMultimapperOrder Random",
+      "--alignIntronMax 1",
     ]
 
+samtools:
+  sort: ""
+  index: ""
+
 feature_counts:
   defaults:
     [
@@ -61,18 +47,16 @@ feature_counts:
       "-g locus_tag",
       "-M",
       "--fracOverlap 0.2",
-      "--largestOverlap"
+      "--largestOverlap",
     ]
 
 deeptools:
-  bin_size: 1
-  normalize: "CPM"
-  defaults: "--exactScaling"
-  paired_end: "--extendReads"
+  genome_size: 2000000
+  extra: "--binSize 1 --normalizeUsing CPM --exactScaling --extendReads"
+
+fastqc:
+  extra: "--quiet --nogroup"
 
 multiqc:
-    config: "config/multiqc_config.yml"
-    defaults: ["--dirs-depth 2 ",
-        "--exclude general_stats ",
-        "--force ",
-        "--dirs"]
+  config: "config/multiqc_config.yml"
+  extra: "--dirs"
diff --git a/.test/config/multiqc_config.yml b/.test/config/multiqc_config.yml
index dcf1a68..8fcc93c 100644
--- a/.test/config/multiqc_config.yml
+++ b/.test/config/multiqc_config.yml
@@ -1,4 +1,5 @@
 report_header_info:
+  - Snakemake workflow: https://github.com/MPUSP/snakemake-bacterial-rnaseq-processing
   - Contact E-mail: "bioinformatics@mpusp.mpg.de"
   - Application Type: "Bacterial RNAseq"
   - Sequencing Platform: "Illumina"
@@ -15,7 +16,7 @@ custom_logo_url: "https://www.mpusp.mpg.de/"
 custom_logo_title: "Max Planck Unit for the Science of Pathogens"
 
 skip_versions_section: false
-disable_version_detection: true
+disable_version_detection: false
 versions_table_group_header: "Workflow Analysis Steps"
 
 skip_generalstats: true
@@ -24,21 +25,10 @@ remove_sections:
   - samtools-stats
 
 module_order:
+  - fastqc
+  - fastp
   - star
-  - cutadapt
-  - fastqc:
-      name: "FastQC (trimmed)"
-      anchor: "fastqc_trimmed"
-      info: "This section of the report shows FastQC results after adapter trimming."
-      target: ""
-      path_filters:
-        - "*/clipped_reads/*_fastqc.zip"
-  - fastqc:
-      name: "FastQC (raw)"
-      anchor: "fastqc_raw"
-      path_filters:
-        - "*/raw_reads/*_fastqc.zip"
-
-report_section_order:
-  fastqc_raw:
-    before: fastqc_trimmed
+  - umitools
+  - featurecounts
+  - samtools
+  - biotype_dist
diff --git a/.test/config/samplesheet/samples_mpusp_custom.tsv b/.test/config/samplesheet/samples_mpusp_custom.tsv
index f673c92..0ac3416 100644
--- a/.test/config/samplesheet/samples_mpusp_custom.tsv
+++ b/.test/config/samplesheet/samples_mpusp_custom.tsv
@@ -1,3 +1,3 @@
-sample	condition	replicate	experiment	data_folder	fq1	fq2	fq_umi
-RNA-1	RNA	1	rnaseq_mpusp_custom	data/rnaseq_mpusp_custom	RNA-1_R1.fastq.gz	RNA-1_R2.fastq.gz	-
-RNA-2	RNA	2	rnaseq_mpusp_custom	data/rnaseq_mpusp_custom	RNA-2_R1.fastq.gz	RNA-2_R2.fastq.gz	-
\ No newline at end of file
+sample	condition	replicate	read1	read2	readumi
+RNA-1	RNA	1	data/rnaseq_mpusp_custom/RNA-1_R1.fastq.gz	data/rnaseq_mpusp_custom/RNA-1_R2.fastq.gz	-
+RNA-2	RNA	2	data/rnaseq_mpusp_custom/RNA-2_R1.fastq.gz	data/rnaseq_mpusp_custom/RNA-2_R2.fastq.gz	-
diff --git a/.test/config/samplesheet/samples_neb_umi.tsv b/.test/config/samplesheet/samples_neb_umi.tsv
index 3270b1d..ff762fc 100644
--- a/.test/config/samplesheet/samples_neb_umi.tsv
+++ b/.test/config/samplesheet/samples_neb_umi.tsv
@@ -1,3 +1,3 @@
-sample	condition	replicate	experiment	data_folder	fq1	fq2	fq_umi
-RNA-1	RNA	1	rnaseq_neb_umi	data/rnaseq_neb_umi	RNA-1_R1.fastq.gz	RNA-1_R2.fastq.gz	RNA-1_UMI.fastq.gz
-RNA-2	RNA	2	rnaseq_neb_umi	data/rnaseq_neb_umi	RNA-2_R1.fastq.gz	RNA-2_R2.fastq.gz	RNA-2_UMI.fastq.gz
\ No newline at end of file
+sample	condition	replicate	read1	read2	readumi
+RNA-1	RNA	1	data/rnaseq_neb_umi/RNA-1_R1.fastq.gz	data/rnaseq_neb_umi/RNA-1_R2.fastq.gz	data/rnaseq_neb_umi/RNA-1_UMI.fastq.gz
+RNA-2	RNA	2	data/rnaseq_neb_umi/RNA-2_R1.fastq.gz	data/rnaseq_neb_umi/RNA-2_R2.fastq.gz	data/rnaseq_neb_umi/RNA-2_UMI.fastq.gz
diff --git a/.test/config/samplesheet/samples_nextflex.tsv b/.test/config/samplesheet/samples_nextflex.tsv
index c62bc59..9cdb7b5 100644
--- a/.test/config/samplesheet/samples_nextflex.tsv
+++ b/.test/config/samplesheet/samples_nextflex.tsv
@@ -1,3 +1,3 @@
-sample	condition	replicate	experiment	data_folder	fq1	fq2	fq_umi
-RNA-1	RNA	1	rnaseq_nextflex	data/rnaseq_nextflex	RNA-1_R1.fastq.gz	RNA-1_R2.fastq.gz	-
-RNA-2	RNA	2	rnaseq_nextflex	data/rnaseq_nextflex	RNA-2_R1.fastq.gz	RNA-2_R2.fastq.gz	-
\ No newline at end of file
+sample	condition	replicate	read1	read2	readumi
+RNA-1	RNA	1	data/rnaseq_nextflex/RNA-1_R1.fastq.gz	data/rnaseq_nextflex/RNA-1_R2.fastq.gz	-
+RNA-2	RNA	2	data/rnaseq_nextflex/RNA-2_R1.fastq.gz	data/rnaseq_nextflex/RNA-2_R2.fastq.gz	-
diff --git a/README.md b/README.md
index 6cd372b..7120f3e 100644
--- a/README.md
+++ b/README.md
@@ -1,31 +1,29 @@
 # <a id="anchortitle" />Snakemake workflow: bacterial-rnaseq-processing
 
-![Platform](https://img.shields.io/badge/platform-all-green)
 [![Snakemake](https://img.shields.io/badge/snakemake-≥8.0.0-brightgreen.svg)](https://snakemake.github.io)
-[![Tests](https://github.com/MPUSP/snakemake-bacterial-rnaseq-processing/actions/workflows/main.yml/badge.svg)](https://github.com/MPUSP/snakemake-bacterial-rnaseq-processing/actions/workflows/main.yml)
+[![GitHub actions](https://github.com/MPUSP/snakemake-bacterial-rnaseq-processing/actions/workflows/snakemake-tests.yml/badge.svg)](https://github.com/MPUSP/snakemake-bacterial-rnaseq-processing/workflows/snakemake-tests.yml)
 [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
-[![workflow catalog](https://img.shields.io/badge/Snakemake%20workflow%20catalog-darkgreen)](https://snakemake.github.io/snakemake-workflow-catalog)
+[![run with apptainer](https://img.shields.io/badge/run_with-apptainer-darkblue)](https://apptainer.org/docs/user/latest/)
+[![workflow catalog](https://img.shields.io/badge/Snakemake%20workflow%20catalog-darkgreen)](https://snakemake.github.io/snakemake-workflow-catalog/docs/workflows/MPUSP/snakemake-bacterial-rnaseq-processing.html)
 
-------------------------------------------------------------------------
+---
 
 A Snakemake workflow for the processing of short read rnaseq data in bacteria.
 
--   [Bacterial RNAseq processing](#anchortitle)
-    -   [Usage](#usage)
-    -   [Workflow overview](#workflow-overview)
-    -   [Installation](#installation)
-    -   [Running the workflow](#running-the-workflow)
-        -   [Input data](#input-data)
-            -   [Reference genome](#reference-genome)
-        -   [Execution](#execution)
-    -   [Authors](#authors)
-    -   [References](#references)
+- [Snakemake workflow: bacterial-rnaseq-processing](#snakemake-workflow-bacterial-rnaseq-processing)
+  - [Usage](#usage)
+  - [Workflow overview](#workflow-overview)
+  - [Deployment options](#deployment-options)
+  - [Authors](#authors)
+  - [References](#references)
 
 ## Usage
 
-The usage of this workflow is described in the [Snakemake Workflow Catalog](https://snakemake.github.io/snakemake-workflow-catalog/?usage=MPUSP%2Fsnakemake-bacterial-rnaseq-processing).
+The usage of this workflow is described in the [Snakemake Workflow Catalog](https://snakemake.github.io/snakemake-workflow-catalog/docs/workflows/MPUSP/snakemake-bacterial-rnaseq-processing).
+
+Detailed information about input data and workflow configuration can also be found in the [`config/README.md`](config/README.md).
 
-If you use this workflow in a paper, don't forget to give credits to the authors by citing the URL of this (original) <repo>sitory and its DOI (see above).
+If you use this workflow in a paper, don't forget to give credits to the authors by citing the URL of this repository or its DOI.
 
 ## Workflow overview
 
@@ -35,7 +33,7 @@ This workflow is a best-practice workflow for the processing of short read seque
    1. Using automatic download from NCBI with a `RefSeq` ID
    2. Using user-supplied files
 2. Check quality of input sequencing data ([FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
-3. Cut adapters and filter by length and/or sequencing quality score ([Cutadapt](https://cutadapt.readthedocs.io/en/stable/))
+3. Cut adapters and filter by length and/or sequencing quality score ([fastp](https://github.com/OpenGene/fastp))
 4. Identify unique molecular identifier (UMI, [UMI-tools](https://umi-tools.readthedocs.io/en/latest/))
 5. Map reads to the reference genome ([STAR aligner](https://github.com/alexdobin/STAR))
 6. Sort and index aligned rnaseq data ([Samtools](http://www.htslib.org/))
@@ -47,101 +45,49 @@ This workflow is a best-practice workflow for the processing of short read seque
 ---
 
 ![](resources/images/dag.png)
-<p>Figure 1: Directed acyclic graph (DAG) of the current workflow steps.</p>
-
-## Installation
-
-**Step 1: Clone this repository**
-
-``` bash
-git clone https://github.com/MPUSP/snakemake-bacterial-rnaseq-processing.git
-cd snakemake-bacterial-rnaseq-processing
-```
-
-**Step 2: Install dependencies**
 
-It is recommended to install snakemake and run the workflow with `conda` or `mamba`. [Miniforge](https://conda-forge.org/download/) is the preferred conda-forge installer and includes `conda`, `mamba` and their dependencies.
-
-**Step 3: Create snakemake environment**
-
-This step creates a new conda environment called `snakemake-bacterial-rnaseq-processing`.
-
-``` bash
-# create new environment with dependencies & activate it
-mamba create -c conda-forge -c bioconda -n snakemake-bacterial-rnaseq-processing snakemake pandas python=3.12
-conda activate snakemake-bacterial-rnaseq-processing
-```
-
-**Note:**
-
-All other dependencies for the workflow are **automatically pulled as `conda` environments** by snakemake, when running the workflow with the `--sdm-conda` parameter (recommended).
+<p>Figure 1: Directed acyclic graph (DAG) of the current workflow steps.</p>
 
-**Step 4: Create all rule specific environments (optional)**
+## Deployment options
 
-This step creates all conda environments specified in the snakemake rules. This step is optional.
+To run the workflow from command line, change the working directory.
 
-``` bash
-# activate new environment
-conda activate snakemake-bacterial-rnaseq-processing
-snakemake -c 1 --sdm conda --conda-create-envs-only --conda-cleanup-pkgs cache
+```bash
+cd path/to/snakemake-workflow-name
 ```
 
-## Running the workflow
-
-### Input data
-
-#### Reference genome
-
-An NCBI Refseq ID, e.g. `GCF_000006785.2`. Find your genome assembly and corresponding ID on [NCBI genomes](https://www.ncbi.nlm.nih.gov/data-hub/genome/). Alternatively use a custom pair of `*.fasta` file and `*.gff` file that describe the genome of choice.
-
-Important requirements when using custom `*.fasta` and `*.gff` files:
-
--   `*.gff` genome annotation must have the same chromosome/region name as the `*.fasta` file (example: `NC_002737.2`)
--   `*.gff` genome annotation must have `gene` and `CDS` type annotation that is automatically parsed to extract transcripts
--   all chromosomes/regions in the `*.gff` genome annotation must be present in the `*.fasta` sequence
--   but not all sequences in the `*.fasta` file need to have annotated genes in the `*.gff` file
-
-#### Read data
-
-RNA sequencing data in `*.fastq.gz` format. The currently supported input data are **second generation reads**. Input data files are supplied via a mandatory table, whose location is indicated in the `config.yml` file (default: `samples.tsv`). The sample sheet has the following layout:
-
-| sample | condition | replicate | experiment | data_folder | fq1 | fq2 | fq_umi |
-|---------|---------|---------|---------|---------|---------|---------|---------|
-| RNA-1 | RNA | 1 | rnaseq_mpusp_custom | data | RNA-1_R1.fastq.gz | RNA-1_R2.fastq.gz | \- |
-| RNA-2 | RNA | 2 | rnaseq_mpusp_custom | data | RNA-2_R2.fastq.gz | RNA-2_R2.fastq.gz | \- |
-
-Some configuration parameters of the pipeline may be specific for your data and library preparation protocol. The options should be adjusted in the `config.yml` file.
-
-Currently, we support example configurations for three different sequencing protocols, *i.e.* `rnaseq_nextflex`, `rnaseq_neb_umi`and `rnseq_mpusp_custom`. These example protocols can be found in `resources/protocols/`.
-
-### Execution
-
-To run the workflow from command line, change the working directory.
+Adjust options in the default config file `config/config.yml`.
+Before running the complete workflow, you can perform a dry run using:
 
-``` bash
-cd snakemake-bacterial-rnaseq-processing
+```bash
+snakemake --dry-run
 ```
 
-To run the complete workflow with test files using **`conda`**, execute the following command. The definition of the number of compute cores is mandatory.
+To run the workflow with test files using **conda**:
 
-``` bash
-snakemake --cores 10 --sdm conda --directory .test
+```bash
+snakemake --cores 2 --sdm conda --directory .test
 ```
 
-To run the workflow with your own data, define the sample sheet as explained above and adjust options in the default config file `config/config.yml` according to your library preparation protocol. Before running the entire workflow, you can perform a dry run using:
+To run the workflow with **apptainer**:
 
-``` bash
-snakemake -c 1 --sdm conda --dry-run
+```bash
+snakemake --cores 2 --sdm conda apptainer --directory .test
 ```
 
-## Author
+## Authors
 
--   Dr. Rina Ahmed-Begrich
-    -   Affiliation: [Max-Planck-Unit for the Science of Pathogens](https://www.mpusp.mpg.de/) (MPUSP), Berlin, Germany
-    -   ORCID profile: https://orcid.org/0000-0002-0656-1795
+- Dr Rina Ahmed-Begrich
+  - Affiliation: [Max-Planck-Unit for the Science of Pathogens](https://www.mpusp.mpg.de/) (MPUSP), Berlin, Germany
+  - ORCID profile: https://orcid.org/0000-0002-0656-1795
+  - github page: https://github.com/rabioinf
+- Dr. Michael Jahn
+  - Affiliation: [Max-Planck-Unit for the Science of Pathogens](https://www.mpusp.mpg.de/) (MPUSP), Berlin, Germany
+  - ORCID profile: https://orcid.org/0000-0002-3913-153X
+  - github page: https://github.com/m-jahn
 
 Visit the MPUSP github page at https://github.com/MPUSP for more info on this workflow and other projects.
 
 ## References
 
--   Essential tools are linked in the top section of this document
\ No newline at end of file
+- Essential tools are linked in the top section of this document
diff --git a/config/README.md b/config/README.md
new file mode 100644
index 0000000..99ca523
--- /dev/null
+++ b/config/README.md
@@ -0,0 +1,65 @@
+## Workflow overview
+
+This workflow is a best-practice workflow for the processing of short read sequencing data in bacteria. The workflow is built using [snakemake](https://snakemake.readthedocs.io/en/stable/) and consists of the following steps:
+
+1. Obtain genome database in `fasta` and `gff` format (`python`, [NCBI Datasets](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/))
+   1. Using automatic download from NCBI with a `RefSeq` ID
+   2. Using user-supplied files
+2. Check quality of input sequencing data ([FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
+3. Cut adapters and filter by length and/or sequencing quality score ([fastp](https://github.com/OpenGene/fastp))
+4. Identify unique molecular identifier (UMI, [UMI-tools](https://umi-tools.readthedocs.io/en/latest/))
+5. Map reads to the reference genome ([STAR aligner](https://github.com/alexdobin/STAR))
+6. Sort and index aligned rnaseq data ([Samtools](http://www.htslib.org/))
+7. Deduplicate reads by unique molecular identifier (UMI, [UMI-tools](https://umi-tools.readthedocs.io/en/latest/))
+8. Generate cpm normalized coverage files ([deepTools](https://deeptools.readthedocs.io/en/latest/))
+9. Quantify biotype features ([featureCounts](https://subread.sourceforge.net/featureCounts.html))
+10. Generate summary report for all processing steps ([MultiQC](https://seqera.io/multiqc/))
+
+## Running the workflow
+
+### Input
+
+#### Reference genome
+
+An NCBI Refseq ID, e.g. `GCF_000006785.2`. Find your genome assembly and corresponding ID on [NCBI genomes](https://www.ncbi.nlm.nih.gov/data-hub/genome/). Alternatively use a custom pair of `*.fasta` file and `*.gff` file that describe the genome of choice.
+
+Important requirements when using custom `*.fasta` and `*.gff` files:
+
+- `*.gff` genome annotation must have the same chromosome/region name as the `*.fasta` file (example: `NC_002737.2`)
+- `*.gff` genome annotation must have `gene` and `CDS` type annotation that is automatically parsed to extract transcripts
+- all chromosomes/regions in the `*.gff` genome annotation must be present in the `*.fasta` sequence
+- but not all sequences in the `*.fasta` file need to have annotated genes in the `*.gff` file
+
+#### Read data
+
+RNA sequencing data in `*.fastq.gz` format. The currently supported input data are **second generation reads**. Input data files are supplied via a mandatory table, whose location is indicated in the `config.yml` file (default: `samples.tsv`). The sample sheet has the following layout:
+
+| sample | condition | replicate | read1             | read2             | readumi |
+| ------ | --------- | --------- | ----------------- | ----------------- | ------- |
+| RNA-1  | RNA       | 1         | RNA-1_R1.fastq.gz | RNA-1_R2.fastq.gz | \-      |
+| RNA-2  | RNA       | 2         | RNA-2_R2.fastq.gz | RNA-2_R2.fastq.gz | \-      |
+
+Some configuration parameters of the pipeline may be specific for your data and library preparation protocol. The options should be adjusted in the `config.yml` file.
+
+Configuration files for different sequencing protocols can be found in `resources/protocols/`.
+Currently, you may find protocols for _i.e._ `rnaseq_nextflex`, `rnaseq_neb_umi` and a custom protocol `rnaseq_mpusp_custom`.
+
+To run the workflow with the respective test data for the different protocols, use the following commands:
+
+```bash
+snakemake --sdm conda --cores 12 --directory .test --configfile resources/protocols/rnaseq_mpusp_custom.yml
+snakemake --sdm conda --cores 12 --directory .test --configfile resources/protocols/rnaseq_neb_umi.yml
+snakemake --sdm conda --cores 12 --directory .test --configfile resources/protocols/rnaseq_nextflex.yml
+```
+
+### Output
+
+| Output File/Folder           | Description                                                                  |
+| ---------------------------- | ---------------------------------------------------------------------------- |
+| `results/genome/`            | Downloaded or user-supplied reference genome and annotation files.           |
+| `results/fastp/`             | Adapter-trimmed and quality-filtered FASTQ files.                            |
+| `results/mapped/`            | Aligned reads in BAM format, coverage in BigWig format                       |
+| `results/deduplicated/`      | Aligned and UMI-deduplicated reads in BAM format, coverage in BigWig format. |
+| `results/qc/`                | Quality control reports for raw and processed reads (FastQC HTML files).     |
+| `results/quantify_biotypes/` | Gene/feature count tables (tab-delimited text files).                        |
+| `results/multiqc/`           | MultiQC report aggregating QC metrics from all steps.                        |
diff --git a/config/config.yml b/config/config.yml
index a9a3478..e9f1091 100644
--- a/config/config.yml
+++ b/config/config.yml
@@ -11,45 +11,34 @@ get_genome:
       "RefSeq": "gene",
       "RefSeq": "pseudogene",
       "RefSeq": "CDS",
-      "Protein Homology": "CDS"
+      "Protein Homology": "CDS",
     ]
 
 extract_features:
   biotypes: ["protein_coding", "pseudogene", "ncRNA", "rRNA", "tRNA"]
 
+umi_extraction:
+  method: "none"
+  pattern: ""
 umi_dedup: "--edit-distance-threshold=0"
 
-cutadapt:
-  read1_adapter: "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA"
-  read2_adapter: "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT"
-  default:
-    [
-      "-q 10 ",
-      "-m 18 ",
-      "--overlap=3"
-    ]
+fastp:
+  extra: "-M 10 -l 18 --adapter_sequence AGATCGGAAGAGCACACGTCTGAACTCCAGTCA --adapter_sequence_r2 AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT"
 
 star:
-  index: Null
-  genomeSAindexNbases: 9
-  multi: 10
-  sam_multi: 1
-  intron_max: 1
-  default:
+  index: "--genomeSAindexNbases 9"
+  extra:
     [
-      "--readFilesCommand zcat ",
-      "--outSAMstrandField None ",
-      "--outSAMattributes All ",
-      "--outSAMattrIHstart 0 ",
-      "--outFilterType Normal ",
-      "--outFilterMultimapScoreRange 1 ",
-      "-o STARmappings ",
-      "--outSAMtype BAM Unsorted ",
-      "--outStd BAM_Unsorted ",
-      "--outMultimapperOrder Random ",
-      "--alignEndsType EndToEnd",
+      "--outFilterMultimapNmax 10",
+      "--outSAMmultNmax 1",
+      "--outMultimapperOrder Random",
+      "--alignIntronMax 1",
     ]
 
+samtools:
+  sort: ""
+  index: ""
+
 feature_counts:
   defaults:
     [
@@ -58,18 +47,16 @@ feature_counts:
       "-g locus_tag",
       "-M",
       "--fracOverlap 0.2",
-      "--largestOverlap"
+      "--largestOverlap",
     ]
 
 deeptools:
-  bin_size: 1
-  normalize: "CPM"
-  defaults: "--exactScaling"
-  paired_end: "--extendReads"
+  genome_size: 2000000
+  extra: "--binSize 1 --normalizeUsing CPM --exactScaling --extendReads"
+
+fastqc:
+  extra: "--quiet --nogroup"
 
 multiqc:
-    config: "config/multiqc_config.yml"
-    defaults: ["--dirs-depth 2 ",
-        "--exclude general_stats ",
-        "--force ",
-        "--dirs"]
+  config: "config/multiqc_config.yml"
+  extra: "--dirs"
diff --git a/config/multiqc_config.yml b/config/multiqc_config.yml
index dcf1a68..8fcc93c 100644
--- a/config/multiqc_config.yml
+++ b/config/multiqc_config.yml
@@ -1,4 +1,5 @@
 report_header_info:
+  - Snakemake workflow: https://github.com/MPUSP/snakemake-bacterial-rnaseq-processing
   - Contact E-mail: "bioinformatics@mpusp.mpg.de"
   - Application Type: "Bacterial RNAseq"
   - Sequencing Platform: "Illumina"
@@ -15,7 +16,7 @@ custom_logo_url: "https://www.mpusp.mpg.de/"
 custom_logo_title: "Max Planck Unit for the Science of Pathogens"
 
 skip_versions_section: false
-disable_version_detection: true
+disable_version_detection: false
 versions_table_group_header: "Workflow Analysis Steps"
 
 skip_generalstats: true
@@ -24,21 +25,10 @@ remove_sections:
   - samtools-stats
 
 module_order:
+  - fastqc
+  - fastp
   - star
-  - cutadapt
-  - fastqc:
-      name: "FastQC (trimmed)"
-      anchor: "fastqc_trimmed"
-      info: "This section of the report shows FastQC results after adapter trimming."
-      target: ""
-      path_filters:
-        - "*/clipped_reads/*_fastqc.zip"
-  - fastqc:
-      name: "FastQC (raw)"
-      anchor: "fastqc_raw"
-      path_filters:
-        - "*/raw_reads/*_fastqc.zip"
-
-report_section_order:
-  fastqc_raw:
-    before: fastqc_trimmed
+  - umitools
+  - featurecounts
+  - samtools
+  - biotype_dist
diff --git a/config/samplesheet/samples.tsv b/config/samplesheet/samples.tsv
index 624f53f..de20d76 100644
--- a/config/samplesheet/samples.tsv
+++ b/config/samplesheet/samples.tsv
@@ -1,3 +1,3 @@
-sample	condition	replicate	experiment	data_folder	fq1	fq2	fq_umi
-RNA-1	RNA	1	rnaseq_neb_umi	.test/data/rnaseq_neb_umi	RNA-1_R1.fastq.gz	RNA-1_R2.fastq.gz	RNA-1_UMI.fastq.gz
-RNA-2	RNA	2	rnaseq_neb_umi	.test/data/rnaseq_neb_umi	RNA-2_R1.fastq.gz	RNA-2_R2.fastq.gz	RNA-2_UMI.fastq.gz
\ No newline at end of file
+sample	condition	replicate	read1	read2	readumi
+RNA-1	RNA	1	.test/data/rnaseq_neb_umi/RNA-1_R1.fastq.gz	.test/data/rnaseq_neb_umi/RNA-1_R2.fastq.gz	.test/data/rnaseq_neb_umi/RNA-1_UMI.fastq.gz
+RNA-2	RNA	2	.test/data/rnaseq_neb_umi/RNA-2_R1.fastq.gz	.test/data/rnaseq_neb_umi/RNA-2_R2.fastq.gz	.test/data/rnaseq_neb_umi/RNA-2_UMI.fastq.gz
diff --git a/config/schemas/config.schema.yml b/config/schemas/config.schema.yml
new file mode 100644
index 0000000..d8feded
--- /dev/null
+++ b/config/schemas/config.schema.yml
@@ -0,0 +1,88 @@
+$schema: "https://json-schema.org/draft/2020-12/schema"
+description: config file structure
+properties:
+  samplesheet:
+    type: string
+  libtype:
+    type: string
+  get_genome:
+    properties:
+      database:
+        type: [string, "null"]
+      assembly:
+        type: [string, "null"]
+      fasta:
+        type: [string, "null"]
+      gff:
+        type: [string, "null"]
+      gff_source_type:
+        type: array
+  extract_features:
+    properties:
+      biotypes:
+        type: array
+        items:
+          type: string
+  umi_extraction:
+    properties:
+      method:
+        type: string
+        enum: ["regex", "string", "none"]
+      pattern:
+        type: string
+  umi_dedup:
+    type: string
+  fastp:
+    properties:
+      extra:
+        type: string
+  star:
+    properties:
+      index:
+        type: [string, "null"]
+      extra:
+        type: array
+        items:
+          type: string
+  samtools:
+    properties:
+      sort:
+        type: string
+      index:
+        type: string
+  feature_counts:
+    properties:
+      defaults:
+        type: array
+        items:
+          type: string
+  deeptools:
+    properties:
+      genome_size:
+        type: number
+      extra:
+        type: string
+  fastqc:
+    properties:
+      extra:
+        type: string
+  multiqc:
+    properties:
+      config:
+        type: string
+      extra:
+        type: string
+required:
+  - samplesheet
+  - libtype
+  - get_genome
+  - extract_features
+  - umi_extraction
+  - umi_dedup
+  - fastp
+  - star
+  - samtools
+  - feature_counts
+  - deeptools
+  - fastqc
+  - multiqc
diff --git a/config/schemas/samples.schema.yml b/config/schemas/samples.schema.yml
new file mode 100644
index 0000000..c642bbd
--- /dev/null
+++ b/config/schemas/samples.schema.yml
@@ -0,0 +1,28 @@
+$schema: "https://json-schema.org/draft/2020-12/schema"
+description: an entry in the sample sheet
+properties:
+  sample:
+    type: string
+    description: sample name/identifier
+  condition:
+    type: string
+    description: sample condition that will be compared during differential analysis
+  replicate:
+    type: number
+    default: 1
+    description: consecutive numbers representing multiple replicates of one condition
+  read1:
+    type: string
+    description: names of fastq files, read 1
+  read2:
+    type: string
+    description: names of fastq files, read 2 (optional)
+  readumi:
+    type: string
+    description: names of fastq files, providing UMI (optional)
+
+required:
+  - sample
+  - condition
+  - replicate
+  - read1
diff --git a/resources/conda_envs/conda_envs.log b/resources/conda_envs/conda_envs.log
index ec4f6fd..1e8af23 100644
--- a/resources/conda_envs/conda_envs.log
+++ b/resources/conda_envs/conda_envs.log
@@ -1,21 +1,12 @@
-name: cutadapt
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - cutadapt=4.9
-name: fastqc
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - fastqc=0.12.1
-name: multiqc
+name: umitools
 channels:
   - conda-forge
   - bioconda
 dependencies:
-  - multiqc=1.24.1
+  - umi_tools=1.1.6
+  - samtools=1.23
+  - htslib=1.23
+  - dnaio=1.2.2
 name: extract_features
 channels:
   - conda-forge
@@ -27,60 +18,17 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  - python=3.12.7
-  - pandas=2.2.3
-  - pyyaml=6.0.2
-  - snakemake=8.25.5
-name: deeptools
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - deeptools=3.5.5
+  - pyyaml=6.0.3
 name: feature_counts
 channels:
   - conda-forge
   - bioconda
 dependencies:
   - subread=2.0.6
-name: get_genome
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - unzip=6.0
-  - ncbi-datasets-cli=16.27.2
-  - bcbio-gff=0.7.1
-  - samtools=1.21
-  - htslib=1.21
-name: samtools
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - samtools=1.21
-  - htslib=1.21
-name: star
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - star=2.7.11b
-  - samtools=1.21
-  - htslib=1.21
-name: umitools
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - umi_tools=1.1.5
-  - samtools=1.21
-  - htslib=1.21
-  - dnaio=1.2.2
 name: quantify_biotypes
 channels:
   - conda-forge
   - bioconda
 dependencies:
-  - python=3.12.7
-  - pandas=2.2.3
+  - python=3.13.12
+  - pandas=2.3.2
diff --git a/resources/images/dag.png b/resources/images/dag.png
index 5ab8b23..e25bc2b 100644
Binary files a/resources/images/dag.png and b/resources/images/dag.png differ
diff --git a/resources/protocols/rnaseq_mpusp_custom.yml b/resources/protocols/rnaseq_mpusp_custom.yml
index bce0678..365d0e2 100644
--- a/resources/protocols/rnaseq_mpusp_custom.yml
+++ b/resources/protocols/rnaseq_mpusp_custom.yml
@@ -11,7 +11,7 @@ get_genome:
       "RefSeq": "gene",
       "RefSeq": "pseudogene",
       "RefSeq": "CDS",
-      "Protein Homology": "CDS"
+      "Protein Homology": "CDS",
     ]
 
 extract_features:
@@ -22,37 +22,24 @@ umi_extraction:
   pattern: "--bc-pattern=NNNCCCNNNCCCNNNC"
 umi_dedup: "--edit-distance-threshold=0"
 
-cutadapt:
-  read1_adapter: "TACACGACGCTCTTCCGATCT"
-  read2_adapter: "AGATCGGAAGAGCGTCGTGTA"
-  default:
-    [
-      "-q 10 ",
-      "-m 18 ",
-      "--overlap=3"
-    ]
+fastp:
+  extra: "-M 10 -l 18 --adapter_sequence TACACGACGCTCTTCCGATCT --adapter_sequence_r2 AGATCGGAAGAGCGTCGTGTA"
 
 star:
-  index: Null
-  genomeSAindexNbases: 9
-  multi: 10
-  sam_multi: 1
-  intron_max: 1
-  default:
+  index: "--genomeSAindexNbases 9"
+  extra:
     [
-      "--readFilesCommand zcat ",
-      "--outSAMstrandField None ",
-      "--outSAMattributes All ",
-      "--outSAMattrIHstart 0 ",
-      "--outFilterType Normal ",
-      "--outFilterMultimapScoreRange 1 ",
-      "-o STARmappings ",
-      "--outSAMtype BAM Unsorted ",
-      "--outStd BAM_Unsorted ",
-      "--outMultimapperOrder Random ",
+      "--outFilterMultimapNmax 10",
+      "--outSAMmultNmax 1",
+      "--outMultimapperOrder Random",
+      "--alignIntronMax 1",
       "--alignEndsType EndToEnd",
     ]
 
+samtools:
+  sort: ""
+  index: ""
+
 feature_counts:
   defaults:
     [
@@ -61,18 +48,16 @@ feature_counts:
       "-g locus_tag",
       "-M",
       "--fracOverlap 0.2",
-      "--largestOverlap"
+      "--largestOverlap",
     ]
 
 deeptools:
-  bin_size: 1
-  normalize: "CPM"
-  defaults: "--exactScaling"
-  paired_end: "--extendReads"
+  genome_size: 2000000
+  extra: "--binSize 1 --normalizeUsing CPM --exactScaling --extendReads"
+
+fastqc:
+  extra: "--quiet --nogroup"
 
 multiqc:
-    config: "config/multiqc_config.yml"
-    defaults: ["--dirs-depth 2 ",
-        "--exclude general_stats ",
-        "--force ",
-        "--dirs"]
+  config: "config/multiqc_config.yml"
+  extra: "--dirs"
diff --git a/resources/protocols/rnaseq_neb_umi.yml b/resources/protocols/rnaseq_neb_umi.yml
index 636b7a5..4e7823a 100644
--- a/resources/protocols/rnaseq_neb_umi.yml
+++ b/resources/protocols/rnaseq_neb_umi.yml
@@ -11,45 +11,34 @@ get_genome:
       "RefSeq": "gene",
       "RefSeq": "pseudogene",
       "RefSeq": "CDS",
-      "Protein Homology": "CDS"
+      "Protein Homology": "CDS",
     ]
 
 extract_features:
   biotypes: ["protein_coding", "pseudogene", "ncRNA", "rRNA", "tRNA"]
 
+umi_extraction:
+  method: "none"
+  pattern: ""
 umi_dedup: "--edit-distance-threshold=0"
 
-cutadapt:
-  read1_adapter: "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA"
-  read2_adapter: "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT"
-  default:
-    [
-      "-q 10 ",
-      "-m 18 ",
-      "--overlap=3"
-    ]
+fastp:
+  extra: "-M 10 -l 18 --adapter_sequence AGATCGGAAGAGCACACGTCTGAACTCCAGTCA --adapter_sequence_r2 AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT"
 
 star:
-  index: Null
-  genomeSAindexNbases: 9
-  multi: 10
-  sam_multi: 1
-  intron_max: 1
-  default:
+  index: "--genomeSAindexNbases 9"
+  extra:
     [
-      "--readFilesCommand zcat ",
-      "--outSAMstrandField None ",
-      "--outSAMattributes All ",
-      "--outSAMattrIHstart 0 ",
-      "--outFilterType Normal ",
-      "--outFilterMultimapScoreRange 1 ",
-      "-o STARmappings ",
-      "--outSAMtype BAM Unsorted ",
-      "--outStd BAM_Unsorted ",
-      "--outMultimapperOrder Random ",
-      "--alignEndsType EndToEnd",
+      "--outFilterMultimapNmax 10",
+      "--outSAMmultNmax 1",
+      "--outMultimapperOrder Random",
+      "--alignIntronMax 1",
     ]
 
+samtools:
+  sort: ""
+  index: ""
+
 feature_counts:
   defaults:
     [
@@ -58,18 +47,16 @@ feature_counts:
       "-g locus_tag",
       "-M",
       "--fracOverlap 0.2",
-      "--largestOverlap"
+      "--largestOverlap",
     ]
 
 deeptools:
-  bin_size: 1
-  normalize: "CPM"
-  defaults: "--exactScaling"
-  paired_end: "--extendReads"
+  genome_size: 2000000
+  extra: "--binSize 1 --normalizeUsing CPM --exactScaling --extendReads"
+
+fastqc:
+  extra: "--quiet --nogroup"
 
 multiqc:
-    config: "config/multiqc_config.yml"
-    defaults: ["--dirs-depth 2 ",
-        "--exclude general_stats ",
-        "--force ",
-        "--dirs"]
+  config: "config/multiqc_config.yml"
+  extra: "--dirs"
diff --git a/resources/protocols/rnaseq_nextflex.yml b/resources/protocols/rnaseq_nextflex.yml
index 9806fc2..01aeb82 100644
--- a/resources/protocols/rnaseq_nextflex.yml
+++ b/resources/protocols/rnaseq_nextflex.yml
@@ -11,7 +11,7 @@ get_genome:
       "RefSeq": "gene",
       "RefSeq": "pseudogene",
       "RefSeq": "CDS",
-      "Protein Homology": "CDS"
+      "Protein Homology": "CDS",
     ]
 
 extract_features:
@@ -19,51 +19,27 @@ extract_features:
 
 umi_extraction:
   method: "string"
-  pattern:
-    [
-      "--bc-pattern=NNNN ",
-      "--bc-pattern2=NNNN"
-    ]
+  pattern: "--bc-pattern=NNNN --bc-pattern2=NNNN"
 umi_dedup: "--edit-distance-threshold=0"
 
-trunc_fastq:
-  default:
-    [
-      "-u='-4' ",
-      "-U='-4'"
-    ]
-
-cutadapt:
-  read1_adapter: "TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC"
-  read2_adapter: "GATCGTCGGACTGTAGAACTCTGAACGTGTAGATC"
-  default:
-    [
-      "-q 10 ",
-      "-m 22 ",
-      "--overlap=3"
-    ]
+fastp:
+  extra: "-M 10 -l 18 --trim_tail1=4 --trim_tail2=4"
 
 star:
-  index: Null
-  genomeSAindexNbases: 9
-  multi: 10
-  sam_multi: 1
-  intron_max: 1
-  default:
+  index: "--genomeSAindexNbases 9"
+  extra:
     [
-      "--readFilesCommand zcat ",
-      "--outSAMstrandField None ",
-      "--outSAMattributes All ",
-      "--outSAMattrIHstart 0 ",
-      "--outFilterType Normal ",
-      "--outFilterMultimapScoreRange 1 ",
-      "-o STARmappings ",
-      "--outSAMtype BAM Unsorted ",
-      "--outStd BAM_Unsorted ",
-      "--outMultimapperOrder Random ",
+      "--outFilterMultimapNmax 10",
+      "--outSAMmultNmax 1",
+      "--outMultimapperOrder Random",
+      "--alignIntronMax 1",
       "--alignEndsType EndToEnd",
     ]
 
+samtools:
+  sort: ""
+  index: ""
+
 feature_counts:
   defaults:
     [
@@ -72,18 +48,16 @@ feature_counts:
       "-g locus_tag",
       "-M",
       "--fracOverlap 0.2",
-      "--largestOverlap"
+      "--largestOverlap",
     ]
 
 deeptools:
-  bin_size: 1
-  normalize: "CPM"
-  defaults: "--exactScaling"
-  paired_end: "--extendReads"
+  genome_size: 2000000
+  extra: "--binSize 1 --normalizeUsing CPM --exactScaling --extendReads"
+
+fastqc:
+  extra: "--quiet --nogroup"
 
 multiqc:
-    config: "config/multiqc_config.yml"
-    defaults: ["--dirs-depth 2 ",
-        "--exclude general_stats ",
-        "--force ",
-        "--dirs"]
+  config: "config/multiqc_config.yml"
+  extra: "--dirs"
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 05b99de..af7ac13 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -1,31 +1,12 @@
-# ----------------------------------------------------- #
-# A Snakemake workflow for the preprocessing of short   #
-# read RNAseq data in bacteria.                         #
-#                                                       #
-# Authors: Rina Ahmed-Begrich                           #
-# Date: 2025-01-02                                      #
-# License: MIT                                          #
-# ----------------------------------------------------- #
 import os
 
-
-__author__ = "Rina Ahmed-Begrich"
-__year__ = "2025"
-
-bold = "\033[1m"
-green = "\033[92m"
-cyan = "\033[36m"
-end = "\033[0m"
-
-msg = f"""{cyan}Bacterial-rnaseq-preprocessing: A Snakemake workflow
-for the preprocessing of short read RNAseq data in bacteria.{end}
-
-"""
-
-epilog = f"""{cyan}Written by {__author__},
-Max Planck Unit for the Science of Pathogens. Copyright (c) {__year__}.
-Copyright Holder All Rights Reserved.{end}
-
+epilog = """
+\033[36m
+Bacterial-rnaseq-processing: A Snakemake workflow
+for the processing of short read RNAseq data in bacteria.
+---
+Max Planck Unit for the Science of Pathogens. Copyright (c) 2026.
+\033[0m
 """
 
 
@@ -37,45 +18,27 @@ configfile: "config/config.yml"
 # load rules
 # -----------------------------------------------------
 include: "rules/common.smk"
-include: "rules/qc.smk"
-include: "rules/trim.smk"
+include: "rules/coverage.smk"
 include: "rules/dedup.smk"
 include: "rules/mapping.smk"
+include: "rules/qc.smk"
 include: "rules/quantification.smk"
-include: "rules/coverage_tracks.smk"
+include: "rules/trim.smk"
 
 
 onstart:
-    print("\n--- Analysis started...\n")
-    print()
-    print("--- Analysis parameters --------------------------------------------\n")
-    print(f"Current working directory: {os.getcwd()}")
-    print(f"Output directory: {os.path.join(os.getcwd(), 'results')}")
-    print()
-    print(f"RNAseq samples: {list(samples.index)}")
-    print()
+    print("\n--- Analysis started ---\n")
+    print(f"RNAseq samples: {list(samples.index)}\n\n")
 
 
 onsuccess:
-    print()
-    print(msg)
+    print("\n--- Workflow finished successfully! ---\n")
     print(epilog)
-    print("--- Workflow finished, no error! -----------------------------------")
-    print()
-    debug = os.path.join(os.getcwd(), "results/workflow.log")
-    shell(
-        "cat {log} > {debug} && echo -e '\nWorkflow finished, no error!\n' >> {debug}"
-    )
 
 
 onerror:
-    print()
-    print(msg)
+    print("\n--- An error occurred, see the snakemake log ---\n")
     print(epilog)
-    print("--- An error occurred! ---------------------------------------------")
-    print()
-    error = os.path.join(os.getcwd(), "results/error.log")
-    shell("cat {log} > {error} && echo -e '\nAn error occurred!' >> {error}")
 
 
 # target rules
diff --git a/workflow/envs/base.yml b/workflow/envs/base.yml
index 3cfe602..8cd8edb 100644
--- a/workflow/envs/base.yml
+++ b/workflow/envs/base.yml
@@ -3,7 +3,4 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  - python=3.12.7
-  - pandas=2.2.3
-  - pyyaml=6.0.2
-  - snakemake=8.25.5
+  - pyyaml=6.0.3
diff --git a/workflow/envs/cutadapt.yml b/workflow/envs/cutadapt.yml
deleted file mode 100644
index e6bbff7..0000000
--- a/workflow/envs/cutadapt.yml
+++ /dev/null
@@ -1,6 +0,0 @@
-name: cutadapt
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - cutadapt=4.9
diff --git a/workflow/envs/deeptools.yml b/workflow/envs/deeptools.yml
deleted file mode 100644
index e9de356..0000000
--- a/workflow/envs/deeptools.yml
+++ /dev/null
@@ -1,6 +0,0 @@
-name: deeptools
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - deeptools=3.5.5
diff --git a/workflow/envs/fastqc.yml b/workflow/envs/fastqc.yml
deleted file mode 100644
index 25e6d86..0000000
--- a/workflow/envs/fastqc.yml
+++ /dev/null
@@ -1,6 +0,0 @@
-name: fastqc
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - fastqc=0.12.1
diff --git a/workflow/envs/get_genome.yml b/workflow/envs/get_genome.yml
deleted file mode 100644
index 26ce7cc..0000000
--- a/workflow/envs/get_genome.yml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: get_genome
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - unzip=6.0
-  - ncbi-datasets-cli=16.27.2
-  - bcbio-gff=0.7.1
-  - samtools=1.21
-  - htslib=1.21
diff --git a/workflow/envs/multiqc.yml b/workflow/envs/multiqc.yml
deleted file mode 100644
index 502ba91..0000000
--- a/workflow/envs/multiqc.yml
+++ /dev/null
@@ -1,6 +0,0 @@
-name: multiqc
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - multiqc=1.24.1
diff --git a/workflow/envs/quantify_biotypes.yml b/workflow/envs/quantify_biotypes.yml
index 85afc13..7147559 100644
--- a/workflow/envs/quantify_biotypes.yml
+++ b/workflow/envs/quantify_biotypes.yml
@@ -3,5 +3,5 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  - python=3.12.7
-  - pandas=2.2.3
+  - python=3.13.12
+  - pandas=2.3.2
diff --git a/workflow/envs/samtools.yml b/workflow/envs/samtools.yml
deleted file mode 100644
index d6a5cca..0000000
--- a/workflow/envs/samtools.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-name: samtools
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - samtools=1.21
-  - htslib=1.21
diff --git a/workflow/envs/star.yml b/workflow/envs/star.yml
deleted file mode 100644
index f1ae198..0000000
--- a/workflow/envs/star.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-name: star
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - star=2.7.11b
-  - samtools=1.21
-  - htslib=1.21
diff --git a/workflow/envs/umitools.yml b/workflow/envs/umitools.yml
index c2d8353..e0d6639 100644
--- a/workflow/envs/umitools.yml
+++ b/workflow/envs/umitools.yml
@@ -3,7 +3,7 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  - umi_tools=1.1.5
-  - samtools=1.21
-  - htslib=1.21
+  - umi_tools=1.1.6
+  - samtools=1.23
+  - htslib=1.23
   - dnaio=1.2.2
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index fb79727..4d35991 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -1,16 +1,17 @@
-import itertools
-import os
 import pandas as pd
-import yaml
 from snakemake.logging import logger
+from snakemake.utils import validate
+from pathlib import Path
 
 
 # read sample sheet
 # -----------------------------------------------------
-na_values = ["", "NaN", "nan", "null", "-"]
 samples = (
     pd.read_csv(
-        config["samplesheet"], sep="\t", dtype={"sample": str}, na_values=na_values
+        config["samplesheet"],
+        sep="\t",
+        dtype={"sample": str},
+        na_values=["", "NaN", "nan", "null", "-"],
     )
     .set_index("sample", drop=False)
     .sort_index()
@@ -19,216 +20,49 @@ samples = (
 
 wildcard_constraints:
     sample="|".join(samples.index),
-    read="|".join(["R1", "R2"]),
+    read="|".join(["read1", "read2", "readumi"]),
 
 
-# helpers
-# -----------------------------------------------------
-experiment_types = list(samples["experiment"].unique())
-
-is_single_end_experiment = False
-is_paired_end_experiment = False
-is_rnaseq_neb_umi = False
-is_rnaseq_nextflex = False
-is_rnaseq_neb_umi = False
-is_rnaseq_mpusp_custom = False
-
-# set flags based on experiment types
-if "rnaseq_neb_umi" in experiment_types:
-    is_rnaseq_neb_umi = True
-if "rnaseq_nextflex" in experiment_types:
-    is_rnaseq_nextflex = True
-if "rnaseq_mpusp_custom" in experiment_types:
-    is_rnaseq_mpusp_custom = True
-
-# if any entry in fq2 is NaN -> single-end experiment
-if samples["fq2"].isna().any():
-    is_single_end_experiment = True
-
-# if any entry in fq2 is not NaN -> paired-end experiment
-if samples["fq2"].notna().any():
-    is_paired_end_experiment = True
-
-
-def validate_experiment_type(a, b, c=False):
-    return sum([a, b, c]) == 1
-
-
-# each rnaseq experiment type can only be used exclusively
-if not validate_experiment_type(
-    is_rnaseq_neb_umi, is_rnaseq_nextflex, is_rnaseq_mpusp_custom
-):
-    msg = "A single experiment type needs to be selected. Possible types are: [rnaseq_neb_umi, rnaseq_nextflex, rnaseq_mpusp_custom]."
-    logger.error(msg)
-    raise ValueError(msg)
-
-# either single-end or paired-end sequencing allowed
-if not validate_experiment_type(a=is_single_end_experiment, b=is_paired_end_experiment):
-    msg = "All samples need to be sequenced either in single-end or paired-end mode in order to analyse them at once."
-    logger.error(msg)
-    raise ValueError(msg)
+# validate sample sheet and config file
+validate(samples, schema="../../config/schemas/samples.schema.yml")
+validate(config, schema="../../config/schemas/config.schema.yml")
 
 
-# set final output files
+# helpers
 # -----------------------------------------------------
-def get_final_output():
-    targets = []
-    targets.append("results/report/multiqc_report.html")
-    targets.append(
-        expand(
-            "results/{step}/wig_normalized/{sample}_cpm_{strand}.bw",
-            step=["mapped", "deduplicated"],
-            sample=samples.index,
-            strand=["plus", "minus"],
-        )
+# determine input type, all entries must be either single or paired end
+def is_paired_end():
+    if samples["read2"].isna().all():
+        return False
+    elif samples["read2"].notna().all():
+        return True
+    else:
+        msg = (
+            "Some samples seem to have a read2 fastq file, while others have only a "
+            + "read1 fastq file. \nYou may not mix single-end and paired-end samples."
+        )
+        logger.error(msg)
+        raise ValueError(msg)
+
+
+# get fastq files
+def get_fastq(wildcards):
+    file = Path(samples.loc[wildcards["sample"]][wildcards["read"]])
+    if file.is_absolute():
+        return file
+    else:
+        input_dir = Path.absolute(Path.cwd())
+        return input_dir / file
+
+
+# get pairs of fastq files for fastp
+def get_fastq_pairs(wildcards):
+    return expand(
+        "results/umi_extract{separate}/{sample}_{read}.fastq.gz",
+        separate="_separate" if samples["readumi"].notna().all() else "",
+        sample=wildcards.sample,
+        read=["read1", "read2"] if is_paired_end() else ["read1"],
     )
-    return targets
-
-
-# returns True if single-end
-def is_single_end(sample):
-    """Determine whether the sample is single-end."""
-    fq2_not_present = pd.isnull(samples.loc[sample, "fq2"])
-    return fq2_not_present
-
-
-# get fastq files for qc input
-def get_qc_input(wildcards):
-    """Get QC input file."""
-    inputs = []
-    if wildcards.status == "raw":
-        inputs.append(
-            expand(
-                "{input_dir}/{sample}",
-                input_dir=samples.loc[wildcards.sample]["data_folder"],
-                sample=samples.loc[wildcards.sample]["fq1"],
-            )
-        )
-        if not is_single_end(wildcards.sample):
-            # append R2 if sample is paired-end
-            inputs.append(
-                expand(
-                    "{input_dir}/{sample}",
-                    input_dir=samples.loc[wildcards.sample]["data_folder"],
-                    sample=samples.loc[wildcards.sample]["fq2"],
-                )
-            )
-
-    if wildcards.status == "clipped":
-        if not is_single_end(wildcards.sample):
-            inputs.append(
-                expand(
-                    os.path.join("results", "clipped", "{sample}_{read}.fastq.gz"),
-                    sample=wildcards.sample,
-                    read=["R1", "R2"],
-                )
-            )
-        else:
-            inputs.append(
-                expand(
-                    os.path.join("results", "clipped", "{sample}.fastq.gz"),
-                    sample=wildcards.sample,
-                )
-            )
-    return list(itertools.chain.from_iterable(inputs))
-
-
-# get fastq files for umi extract input
-def get_umi_input(wildcards):
-    """Get FASTQ files UMI extraction."""
-    inputs = []
-    # three fastq files
-    inputs.append(
-        expand(
-            "{input_dir}/{sample}",
-            input_dir=samples.loc[wildcards.sample]["data_folder"],
-            sample=samples.loc[wildcards.sample]["fq1"],
-        )
-    )
-    if not is_single_end(wildcards.sample):
-        # append R2 if sample is paired-end
-        inputs.append(
-            expand(
-                "{input_dir}/{sample}",
-                input_dir=samples.loc[wildcards.sample]["data_folder"],
-                sample=samples.loc[wildcards.sample]["fq2"],
-            )
-        )
-    # include third fastq file
-    if is_rnaseq_neb_umi:
-        inputs.append(
-            expand(
-                "{input_dir}/{sample}",
-                input_dir=samples.loc[wildcards.sample]["data_folder"],
-                sample=samples.loc[wildcards.sample]["fq_umi"],
-            )
-        )
-    return list(itertools.chain.from_iterable(inputs))
-
-
-# returns fastq files for trimming
-def get_trimming_input(wildcards):
-    """Get FASTQ files for trimming."""
-    inputs = []
-    if is_single_end_experiment:
-        inputs.append(
-            expand("results/umi_extract/{sample}.fastq.gz", sample=wildcards.sample)
-        )
-    elif is_paired_end_experiment:
-        inputs.append(
-            expand(
-                "results/umi_extract/{sample}_{read}.fastq.gz",
-                sample=wildcards.sample,
-                read=["R1", "R2"],
-            )
-        )
-    return list(itertools.chain.from_iterable(inputs))
-
-
-# returns fastq files for truncation post trimming
-def get_trunc_input(wildcards):
-    """Get FASTQ files for trunction post trimming."""
-    inputs = []
-    if is_single_end_experiment:
-        inputs.append(expand("results/clipped/{sample}.fastq.gz", sample=wildcards.sample))
-    elif is_paired_end_experiment:
-        inputs.append(
-            expand(
-                "results/clipped/{sample}_{read}.fastq.gz",
-                sample=wildcards.sample,
-                read=["R1", "R2"],
-            )
-        )
-    return list(itertools.chain.from_iterable(inputs))
-
-
-# returns fastq files for mapping
-def get_mapping_input(wildcards):
-    """Get FASTQ files for trimming."""
-    inputs = []
-    if is_single_end_experiment and (is_rnaseq_mpusp_custom or is_rnaseq_neb_umi):
-        inputs.append(expand("results/clipped/{sample}.fastq.gz", sample=wildcards.sample))
-    elif is_single_end_experiment and is_rnaseq_nextflex:
-        inputs.append(
-            expand("results/trunc_fastq/{sample}.fastq.gz", sample=wildcards.sample)
-        )
-    elif is_paired_end_experiment and (is_rnaseq_mpusp_custom or is_rnaseq_neb_umi):
-        inputs.append(
-            expand(
-                "results/clipped/{sample}_{read}.fastq.gz",
-                sample=wildcards.sample,
-                read=["R1", "R2"],
-            )
-        )
-    elif is_paired_end_experiment and is_rnaseq_nextflex:
-        inputs.append(
-            expand(
-                "results/trunc_fastq/{sample}_{read}.fastq.gz",
-                sample=wildcards.sample,
-                read=["R1", "R2"],
-            )
-        )
-    return list(itertools.chain.from_iterable(inputs))
 
 
 # return bam files for alignment qc
@@ -245,68 +79,60 @@ def get_stats_input(wildcards):
         )
 
 
-# return bam files to generate coverage tracks
-def get_bigwig_input(wildcards):
-    if wildcards.step == "mapped":
-        return expand(
-            "results/mapped/{sample}.bam",
-            sample=wildcards.sample,
-        )
-    if wildcards.step == "deduplicated":
-        return expand(
-            "results/deduplicated/{sample}.bam",
-            sample=wildcards.sample,
-        )
-
-
 # returns path to conda envs files
 def get_conda_envs_files():
-    wf_dir = os.path.abspath(workflow.basedir)
-    envs = []
+    wf_dir = Path(workflow.basedir).absolute() / "envs"
     try:
-        envs.append(
-            expand(
-                os.path.join(wf_dir, "envs", "{envs}"), envs=os.listdir(f"{wf_dir}/envs")
-            )
-        )
+        envs = [str(i) for i in wf_dir.iterdir()]
     except FileNotFoundError:
-        envs.append([""])
-    return list(itertools.chain.from_iterable(envs))
+        msg = "No conda environments found in the 'envs' directory."
+        logger.error(msg)
+    return envs
 
 
-def construct_multiqc_input():
+def get_multiqc_input(wildcards):
     inputs = []
-    inputs.append(
-        expand(
-            "results/qc/{status}_reads/{sample}",
-            status=["raw", "clipped"],
-            sample=samples.index,
-        ),
+    inputs += expand(
+        "results/fastp/{sample}.json",
+        sample=samples.index,
     )
-    inputs.append(
-        expand(
-            "results/qc/{step}_alignment/{sample}.flagstat",
-            step=["mapped", "dedup"],
-            sample=samples.index,
-        )
+    inputs += expand(
+        "results/fastqc/{sample}_{read}_fastqc.{ext}",
+        sample=samples.index,
+        read=["read1", "read2"] if is_paired_end() else ["read1"],
+        ext=["html", "zip"],
+    )
+    inputs += expand(
+        "results/mapped/unsorted/{sample}/mapped.bam",
+        sample=samples.index,
+    )
+    inputs += expand(
+        "results/deduplicated/log/{sample}_umi_stats.txt",
+        sample=samples.index,
+    )
+    inputs += expand(
+        "results/qc/{step}/{sample}.flagstat",
+        step=["mapped", "deduplicated"],
+        sample=samples.index,
+    )
+    inputs += expand(
+        "results/qc/biotypes/{sample}.counts.summary",
+        sample=samples.index,
     )
-    inputs.append(
+    inputs += ["results/qc/biotypes/barplot_biotype_data_mqc.json"]
+    return inputs
+
+
+# get final output files
+def get_final_output():
+    targets = []
+    targets.append("results/multiqc/multiqc_report.html")
+    targets.append(
         expand(
-            "results/qc/biotypes/{sample}.counts.summary",
+            "results/{step}/{sample}_cpm_{strand}.bw",
+            step=["mapped", "deduplicated"],
             sample=samples.index,
+            strand=["plus", "minus"],
         )
     )
-    inputs.append(["results/qc/biotypes/barplot_biotype_data_mqc.json"])
-    return list(itertools.chain.from_iterable(inputs))
-
-
-def define_multiqc_dirs():
-    dirs = []
-    prefix = "results"
-    dirs.append(os.path.join(prefix, "qc/raw_reads"))
-    dirs.append(os.path.join(prefix, "clipped"))
-    dirs.append(os.path.join(prefix, "qc/clipped_reads"))
-    dirs.append(os.path.join(prefix, "mapped/unsorted"))
-    dirs.append(os.path.join(prefix, "deduplicated"))
-    dirs.append(os.path.join(prefix, "qc/biotypes"))
-    return " ".join(dirs)
+    return targets
diff --git a/workflow/rules/coverage.smk b/workflow/rules/coverage.smk
new file mode 100644
index 0000000..4a042ec
--- /dev/null
+++ b/workflow/rules/coverage.smk
@@ -0,0 +1,25 @@
+# --------------------------------------------------------------
+# module to generate normalized coverage tracks using deeptools
+# Note: deeptools implements the strand selection in the counter intuitive way:
+# This means, that when using deeptools for extracting coverage files,
+# strand must always be switched. forward == reverse!
+# --------------------------------------------------------------
+rule deeptools_coverage:
+    input:
+        bam="results/{step}/{sample}.bam",
+        bai="results/{step}/{sample}.bam.bai",
+    output:
+        bw="results/{step}/{sample}_cpm_{strand}.bw",
+    threads: max(1, int(workflow.cores * 0.25))
+    params:
+        effective_genome_size=config["deeptools"]["genome_size"],
+        extra=lambda wc: (
+            config["deeptools"]["extra"]
+            + f" --filterRNAstrand {('reverse' if(config['libtype']== 'sense' and wc.strand== 'plus') or(config['libtype']== 'antisense' and wc.strand== 'minus') else 'forward')}"
+        ),
+    log:
+        "results/{step}/log/{sample}_cpm_{strand}.log",
+    message:
+        "generate normalized coverage files using deeptools"
+    wrapper:
+        "v7.0.0/bio/deeptools/bamcoverage"
diff --git a/workflow/rules/coverage_tracks.smk b/workflow/rules/coverage_tracks.smk
deleted file mode 100644
index a88b9a7..0000000
--- a/workflow/rules/coverage_tracks.smk
+++ /dev/null
@@ -1,48 +0,0 @@
-# --------------------------------------------------------------
-# module to generate normalized coverage tracks using deeptools
-# Note: deeptools implements the strand selection in the counter intuitive way:
-# This means, that when using deeptools for extracting coverage files,
-# strand must always be switched. forward == reverse!
-# --------------------------------------------------------------
-rule normalize_bw:
-    input:
-        get_bigwig_input,
-    output:
-        "results/{step}/wig_normalized/{sample}_cpm_{strand}.bw",
-    conda:
-        "../envs/deeptools.yml"
-    message:
-        """--- Generate normalized bigwig coverage file."""
-    log:
-        log="results/{step}/wig_normalized/log/{sample}_cpm_{strand}.log",
-        stderr="results/{step}/wig_normalized/log/{sample}_cpm_{strand}.stderr",
-    params:
-        bin=lambda wc: config["deeptools"]["bin_size"],
-        norm=lambda wc: config["deeptools"]["normalize"],
-        defaults=lambda wc: (
-            " ".join(
-                [config["deeptools"]["defaults"], config["deeptools"]["paired_end"]]
-            )
-            if is_paired_end_experiment
-            else config["deeptools"]["defaults"]
-        ),
-        libtype=lambda wc: config["libtype"],
-        strand=lambda wc: wc.strand,
-    threads: int(workflow.cores * 0.2)  # assign 20% of max cores
-    shell:
-        "if [ {params.libtype} == 'sense' ] && [ {params.strand} == 'plus' ]; then "
-        "strand=`echo -e 'reverse'`; "
-        "elif [ {params.libtype} == 'antisense' ] && [ {params.strand} == 'plus' ]; then "
-        "strand=`echo -e 'forward'`; "
-        "elif [ {params.libtype} == 'sense' ] && [ {params.strand} == 'minus' ]; then "
-        "strand=`echo -e 'forward'`; "
-        "else strand=`echo -e 'reverse'`; "
-        "fi; "
-        "bamCoverage --bam {input} "
-        "--binSize {params.bin} "
-        "--normalizeUsing {params.norm} "
-        "--outFileFormat bigwig "
-        "-p {threads} "
-        "{params.defaults} "
-        "--filterRNAstrand ${{strand}} "
-        "-o {output} > {log.log} 2> {log.stderr}"
diff --git a/workflow/rules/dedup.smk b/workflow/rules/dedup.smk
index b7c9a51..5c53a84 100644
--- a/workflow/rules/dedup.smk
+++ b/workflow/rules/dedup.smk
@@ -1,160 +1,116 @@
-# ---------------------------------------------------------------
-# module to extract umis - umi is located in separate fastq file
-# ---------------------------------------------------------------
-if is_single_end_experiment and is_rnaseq_neb_umi:
-
-    rule umi_extract:
-        input:
-            get_umi_input,
-        output:
-            fastq="results/umi_extract/{sample}.fastq.gz",
-        conda:
-            "../envs/umitools.yml"
-        log:
-            path="results/umi_extract/log/{sample}.log",
-        message:
-            """--- Extracting UMIs."""
-        script:
-            "../scripts/extract_umis.py"
-
-
-if is_paired_end_experiment and is_rnaseq_neb_umi:
-
-    rule umi_extract_pe:
-        input:
-            get_umi_input,
-        output:
-            R1="results/umi_extract/{sample}_R1.fastq.gz",
-            R2="results/umi_extract/{sample}_R2.fastq.gz",
-        conda:
-            "../envs/umitools.yml"
-        log:
-            path="results/umi_extract/log/{sample}.log",
-        message:
-            """--- Extracting UMIs."""
-        script:
-            "../scripts/extract_umis.py"
-
-
-# ---------------------------------------------------------------
-# module to extract umis - umi is located in read sequence
-# ---------------------------------------------------------------
-if is_single_end_experiment and (is_rnaseq_mpusp_custom or is_rnaseq_nextflex):
-
-    rule umi_extract:
-        input:
-            fastq=get_umi_input,
-        output:
-            fastq="results/umi_extract/{sample}.fastq.gz",
-        conda:
-            "../envs/umitools.yml"
-        message:
-            """--- Extracting UMIs."""
-        params:
-            method=config["umi_extraction"]["method"],
-            pattern=config["umi_extraction"]["pattern"],
-        log:
-            path="results/umi_extract/log/{sample}.log",
-            error="results/umi_extract/log/{sample}.err",
-        shell:
-            "umi_tools extract "
-            "--extract-method={params.method} "
-            "{params.pattern} "
-            "--stdin {input.fastq} "
-            "--stdout {output.fastq} "
-            "--log={log.path} 2> {log.error}"
-
-
-if is_paired_end_experiment and (is_rnaseq_mpusp_custom or is_rnaseq_nextflex):
-
-    rule umi_extract_pe:
-        input:
-            fastqs=get_umi_input,
-        output:
-            R1="results/umi_extract/{sample}_R1.fastq.gz",
-            R2="results/umi_extract/{sample}_R2.fastq.gz",
-        conda:
-            "../envs/umitools.yml"
-        message:
-            """--- Extracting UMIs."""
-        params:
-            method=config["umi_extraction"]["method"],
-            pattern=config["umi_extraction"]["pattern"],
-        log:
-            path="results/umi_extract/log/{sample}.log",
-            error="results/umi_extract/log/{sample}.err",
-        shell:
-            "umi_tools extract "
-            "--extract-method={params.method} "
-            "{params.pattern} "
-            "--stdin={input.fastqs[0]} "
-            "--read2-in={input.fastqs[1]} "
-            "--stdout={output.R1} "
-            "--read2-out={output.R2} "
-            "--log={log.path} 2> {log.error}"
-
-
-# ---------------------------------------------
-# module to deduplicate reads via UMIs
-# ---------------------------------------------
-if is_single_end_experiment:
-
-    rule umi_dedup:
-        input:
-            bam="results/mapped/{sample}.bam",
-            bai="results/mapped/{sample}.bam.bai",
-        output:
-            bam="results/deduplicated/{sample}.bam",
-            bai="results/deduplicated/{sample}.bam.bai",
-        conda:
-            "../envs/umitools.yml"
-        message:
-            """--- UMI tools deduplication."""
-        params:
-            tmp="results/deduplicated/sort_{sample}_tmp",
-            default=config["umi_dedup"],
-        log:
-            path="results/deduplicated/log/{sample}.log",
-            stderr="results/deduplicated/log/{sample}.stderr",
-            stats="results/deduplicated/log/{sample}_umi_stats.txt",
-        threads: int(workflow.cores * 0.2)  # assign 25% of max cores.
-        shell:
-            "umi_tools dedup "
-            "{params.default} "
-            "--stdin={input.bam} "
-            "--output-stats={log.stats} "
-            "--log={log.path} 2> {log.stderr} | "
-            "samtools sort -@ {threads} -O bam -T {params.tmp} -o {output.bam}; "
-            "samtools index {output.bam}"
-
-
-if is_paired_end_experiment:
-
-    rule umi_dedup_pe:
-        input:
-            bam="results/mapped/{sample}.bam",
-            bai="results/mapped/{sample}.bam.bai",
-        output:
-            bam="results/deduplicated/{sample}.bam",
-            bai="results/deduplicated/{sample}.bam.bai",
-        conda:
-            "../envs/umitools.yml"
-        message:
-            """--- UMI tools deduplication."""
-        params:
-            tmp="results/deduplicated/sort_{sample}_tmp",
-            default=config["umi_dedup"],
-        log:
-            path="results/deduplicated/log/{sample}.log",
-            stderr="results/deduplicated/log/{sample}.stderr",
-            stats="results/deduplicated/log/{sample}_umi_stats.txt",
-        threads: int(workflow.cores * 0.2)  # assign 25% of max cores.
-        shell:
-            "umi_tools dedup "
-            "--paired "
-            "{params.default} "
-            "--stdin={input.bam} "
-            "--output-stats={log.stats} "
-            "--log={log.path} 2> {log.stderr} | "
-            "samtools sort -@ {threads} -O bam -T {params.tmp} -o {output.bam}; "
-            "samtools index {output.bam}"
+rule get_fastq:
+    input:
+        get_fastq,
+    output:
+        fastq="results/get_fastq/{sample}_{read}.fastq.gz",
+    conda:
+        "../envs/base.yml"
+    message:
+        "obtaining fastq files"
+    log:
+        "results/get_fastq/log/{sample}_{read}.log",
+    shell:
+        "ln -s {input} {output.fastq};"
+        "echo 'made symbolic link from {input} to {output.fastq}' > {log}"
+
+
+rule umi_extract_standard:
+    input:
+        fq1="results/get_fastq/{sample}_read1.fastq.gz",
+        fq2="results/get_fastq/{sample}_read2.fastq.gz" if is_paired_end() else [],
+    output:
+        fq1="results/umi_extract/{sample}_read1.fastq.gz",
+        fq2="results/umi_extract/{sample}_read2.fastq.gz" if is_paired_end() else [],
+    conda:
+        "../envs/umitools.yml"
+    message:
+        "--- Extracting UMIs from read."
+    params:
+        method=config["umi_extraction"]["method"],
+        pattern=config["umi_extraction"]["pattern"],
+        read2=(
+            lambda wildcards, input, output: (
+                f"--read2-in {input.fq2} --read2-out {output.fq2}" if input.fq2 else ""
+            )
+        ),
+    log:
+        path="results/umi_extract/log/{sample}.log",
+        error="results/umi_extract/log/{sample}.err",
+    shell:
+        """
+        if [[ "{params.method}" != "none" ]]; then
+            umi_tools extract \
+            --extract-method {params.method} \
+            {params.pattern} \
+            --stdin {input.fq1} \
+            --stdout {output.fq1} \
+            {params.read2} \
+            --log {log.path} 2> {log.error}
+        else
+            cp {input.fq1} {output.fq1}
+            if [[ -n "{params.read2}" ]]; then
+                cp {input.fq2} {output.fq2}
+            fi
+        fi
+        """
+
+
+rule umi_extract_separate:
+    input:
+        fq1="results/get_fastq/{sample}_read1.fastq.gz",
+        fq2="results/get_fastq/{sample}_read2.fastq.gz" if is_paired_end() else [],
+        fqumi="results/get_fastq/{sample}_readumi.fastq.gz",
+    output:
+        fq1="results/umi_extract_separate/{sample}_read1.fastq.gz",
+        fq2=(
+            "results/umi_extract_separate/{sample}_read2.fastq.gz"
+            if is_paired_end()
+            else []
+        ),
+    conda:
+        "../envs/umitools.yml"
+    log:
+        "results/umi_extract_separate/log/{sample}.log",
+    message:
+        "--- Extracting UMIs from separate Fastq file."
+    script:
+        "../scripts/extract_umis.py"
+
+
+rule umi_dedup:
+    input:
+        bam="results/mapped/{sample}.bam",
+        bai="results/mapped/{sample}.bam.bai",
+    output:
+        bam="results/deduplicated/{sample}.bam",
+        bai="results/deduplicated/{sample}.bam.bai",
+    conda:
+        "../envs/umitools.yml"
+    message:
+        "--- UMI tools deduplication."
+    params:
+        method=config["umi_extraction"]["method"],
+        tmp="results/deduplicated/sort_{sample}_tmp",
+        paired="--paired " if is_paired_end() else "",
+        default=config["umi_dedup"],
+    log:
+        path="results/deduplicated/log/{sample}.log",
+        stderr="results/deduplicated/log/{sample}.stderr",
+        stats="results/deduplicated/log/{sample}_umi_stats.txt",
+    threads: max(1, int(workflow.cores * 0.25))
+    shell:
+        """
+        if [[ "{params.method}" != "none" ]]; then
+            umi_tools dedup \
+            {params.paired} \
+            {params.default} \
+            --stdin={input.bam} \
+            --output-stats={log.stats} \
+            --log={log.path} 2> {log.stderr} |
+            samtools sort -@ {threads} -O bam -T {params.tmp} -o {output.bam};
+            samtools index {output.bam};
+        else
+            samtools sort -@ {threads} -O bam -T {params.tmp} -o {output.bam} {input.bam};
+            samtools index {output.bam};
+        fi
+        """
diff --git a/workflow/rules/mapping.smk b/workflow/rules/mapping.smk
index 41ce6fc..e8bf772 100644
--- a/workflow/rules/mapping.smk
+++ b/workflow/rules/mapping.smk
@@ -1,113 +1,93 @@
-# -----------------------------------------------------
-# module to fetch genome from NCBI or Ensemble
-# -----------------------------------------------------
 rule get_genome:
+    input:
+        fasta=lambda wildcards: (
+            config["get_genome"]["fasta"]
+            if config["get_genome"]["database"] == "manual"
+            else []
+        ),
+        gff=lambda wildcards: (
+            config["get_genome"]["gff"]
+            if config["get_genome"]["database"] == "manual"
+            else []
+        ),
     output:
-        path=directory("results/genome"),
         fasta="results/genome/genome.fasta",
         gff="results/genome/genome.gff",
-    conda:
-        "../envs/get_genome.yml"
+        fai="results/genome/genome.fasta.fai",
     message:
-        """--- Parsing genome GFF and FASTA files."""
+        "--- Parsing genome GFF and FASTA files."
     params:
         database=config["get_genome"]["database"],
         assembly=config["get_genome"]["assembly"],
-        fasta=config["get_genome"]["fasta"],
-        gff=config["get_genome"]["gff"],
+        gff_source_types=config["get_genome"]["gff_source_type"],
     log:
-        path="results/genome/log/get_genome.log",
-    script:
-        "../scripts/get_genome.py"
+        "results/genome/get_genome.log",
+    wrapper:
+        "https://raw.githubusercontent.com/MPUSP/mpusp-snakemake-wrappers/refs/heads/main/get_genome"
 
 
-# -----------------------------------------------------
-# module to map reads to ref genome using STAR aligner
-# -----------------------------------------------------
-rule create_star_index:
+rule star_index:
     input:
-        genome="results/genome/genome.fasta",
+        fasta=rules.get_genome.output.fasta,
     output:
-        path=directory("results/genome/index"),
-    conda:
-        "../envs/star.yml"
-    message:
-        """--- STAR index creation."""
+        directory("results/mapped/index/"),
+    threads: 1
     params:
-        index=config["star"]["index"],
-        indexNbases=config["star"]["genomeSAindexNbases"],
+        extra=config["star"]["index"],
     log:
-        path="results/genome/log/star_index.log",
-    shell:
-        "if [ {params.index} == None ]; then "
-        "mkdir {output.path};"
-        "STAR --runMode genomeGenerate "
-        "--genomeDir {output.path} "
-        "--genomeFastaFiles {input.genome} "
-        "--genomeSAindexNbases {params.indexNbases} > {log.path}; "
-        "rm -f ./Log.out; "
-        "else "
-        "ln -s {params.index} {output.path}; "
-        "echo 'made symbolic link from {params.index} to {output.path}' > {log.path}; "
-        "fi;"
+        "results/mapped/index/index.log",
+    message:
+        "--- Create STAR index."
+    wrapper:
+        "v7.2.0/bio/star/index"
 
 
-# -----------------------------------------------------
-# module to map reads to ref genome using STAR aligner
-# -----------------------------------------------------
 rule star_mapping:
     input:
-        fastqs=get_mapping_input,
-        genome=rules.create_star_index.output,
+        fq1="results/fastp/{sample}_read1.fastq.gz",
+        fq2="results/fastp/{sample}_read2.fastq.gz" if is_paired_end() else [],
+        idx=rules.star_index.output,
     output:
-        bam="results/mapped/unsorted/{sample}.bam",
-    conda:
-        "../envs/star.yml"
+        aln="results/mapped/unsorted/{sample}/mapped.bam",
+        log_final="results/mapped/unsorted/{sample}/Log.final.out",
+    log:
+        "results/mapped/unsorted/{sample}/star.log",
     message:
-        """--- STAR mapping."""
+        "--- STAR mapping."
     params:
-        default=config["star"]["default"],
-        multi=config["star"]["multi"],
-        sam_multi=config["star"]["sam_multi"],
-        intron_max=config["star"]["intron_max"],
-        outprefix=lambda w, output: f"{os.path.splitext(output.bam)[0]}_",
-        input_str=lambda w, input: (
-            " ".join(input.fastqs) if len(input.fastqs) == 2 else input.fastqs
-        ),
+        extra=config["star"]["extra"],
+    threads: max(1, int(workflow.cores * 0.25))
+    wrapper:
+        "v7.2.0/bio/star/align"
+
+
+rule samtools_sort:
+    input:
+        "results/mapped/unsorted/{sample}/mapped.bam",
+    output:
+        "results/mapped/{sample}.bam",
     log:
         "results/mapped/log/{sample}.log",
-    threads: int(workflow.cores * 0.2) if int(workflow.cores * 0.2) >= 1 else 1  # assign 20% of max cores.
-    shell:
-        "STAR "
-        "--runThreadN {threads} "
-        "--genomeDir {input.genome} "
-        "--readFilesIn {params.input_str} "
-        "{params.default} "
-        "--outFilterMultimapNmax {params.multi} "
-        "--alignIntronMax {params.intron_max} "
-        "--outSAMmultNmax {params.sam_multi} "
-        "--outFileNamePrefix {params.outprefix} "
-        "> {output.bam} 2> {log}"
+    message:
+        "--- Sort reads after mapping."
+    params:
+        extra=config["samtools"]["sort"],
+    threads: max(1, int(workflow.cores * 0.25))
+    wrapper:
+        "v7.0.0/bio/samtools/sort"
 
 
-# ---------------------------------------------------
-# module to sort and index bam file using samtools
-# ---------------------------------------------------
-rule mapping_sorted_bam:
+rule samtools_index:
     input:
-        bam=rules.star_mapping.output.bam,
+        "results/mapped/{sample}.bam",
     output:
-        bam="results/mapped/{sample}.bam",
-        bai="results/mapped/{sample}.bam.bai",
-    conda:
-        "../envs/samtools.yml"
+        "results/mapped/{sample}.bam.bai",
     log:
-        "results/mapped/log/samtools_{sample}.log",
+        "results/mapped/log/{sample}_index.log",
     message:
-        """--- Samtools sort and index bam files."""
+        "--- Index reads."
     params:
-        tmp="results/mapped/sort_{sample}_tmp",
-    threads: int(workflow.cores * 0.2)  # assign 20% of max cores
-    shell:
-        "samtools sort -@ {threads} -O bam -T {params.tmp} -o {output.bam} {input.bam} &> {log}; "
-        "samtools index -@ {threads} {output.bam} 2>> {log}"
+        extra=config["samtools"]["index"],
+    threads: max(1, int(workflow.cores * 0.25))
+    wrapper:
+        "v7.0.0/bio/samtools/index"
diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk
index c648ed8..1c6003b 100644
--- a/workflow/rules/qc.smk
+++ b/workflow/rules/qc.smk
@@ -1,85 +1,36 @@
-# -----------------------------------------------------
-# modules to make fastqc report
-# -----------------------------------------------------
-if is_single_end_experiment:
-
-    rule fastqc:
-        input:
-            fastq=get_qc_input,
-        output:
-            report=directory("results/qc/{status}_reads/{sample}"),
-        conda:
-            "../envs/fastqc.yml"
-        message:
-            """--- Checking fastq files with FastQC."""
-        log:
-            "results/qc/{status}_reads/log/{sample}.log",
-        threads: int(workflow.cores * 0.2)  # assign 20% of max cores
-        shell:
-            "mkdir -p {output.report}; "
-            "fastqc --nogroup --threads {threads} -o {output.report} -q {input.fastq} > {log}"
-
-
-if is_paired_end_experiment:
-
-    rule fastqc:
-        input:
-            fastqs=get_qc_input,
-        output:
-            report=directory("results/qc/{status}_reads/{sample}"),
-        conda:
-            "../envs/fastqc.yml"
-        message:
-            """--- Checking fastq files with FastQC."""
-        log:
-            "results/qc/{status}_reads/log/{sample}.log",
-        threads: int(workflow.cores * 0.2)  # assign 20% of max cores
-        shell:
-            "mkdir -p {output.report}; "
-            "fastqc --nogroup --threads {threads} -o {output.report} -q {input.fastqs[0]} > {log}; "
-            "fastqc --nogroup --threads {threads} -o {output.report} -q {input.fastqs[1]} &> {log}"
-
-
-# -----------------------------------------------------
-# module to determine alignment stats
-# -----------------------------------------------------
-rule alignment_stats:
+rule fastqc:
     input:
-        stats=get_stats_input,
+        fastq="results/get_fastq/{sample}_{read}.fastq.gz",
     output:
-        "results/qc/{step}_alignment/{sample}.flagstat",
-    conda:
-        "../envs/samtools.yml"
-    log:
-        "results/qc/{step}_alignment/log/samtools_{sample}.log",
+        html="results/fastqc/{sample}_{read}_fastqc.html",
+        zip="results/fastqc/{sample}_{read}_fastqc.zip",
+    params:
+        extra=config["fastqc"]["extra"],
     message:
-        """--- Generate mapping statistics of BAM file using samtools."""
-    threads: int(workflow.cores * 0.2)  # assign 20% of max cores
-    shell:
-        "samtools flagstat -@ {threads} {input.stats} > {output} 2> {log}"
+        "--- Checking fastq files with FastQC"
+    log:
+        "results/fastqc/log/{sample}_{read}.log",
+    threads: max(1, int(workflow.cores * 0.25))
+    resources:
+        mem_mb=4096,
+    wrapper:
+        "v6.0.0/bio/fastqc"
 
 
-# -----------------------------------------------------
-# module to qc quantified biotypes by featureCounts
-# -----------------------------------------------------
-rule qc_biotype_summary:
+rule alignment_stats:
     input:
-        "results/quantify_biotypes/{sample}.counts.summary",
+        "results/{step}/{sample}.bam",
     output:
-        "results/qc/biotypes/{sample}.counts.summary",
-    conda:
-        "../envs/feature_counts.yml"
+        "results/qc/{step}/{sample}.flagstat",
     log:
-        "results/qc/biotypes/log/copy_{sample}.log",
+        "results/qc/{step}/log/{sample}_flagstat.log",
     message:
-        """--- Copy biotype quantification summary."""
-    shell:
-        "cp {input} {output} &> {log}"
+        "--- Generate mapping statistics of BAM file using samtools."
+    threads: max(1, int(workflow.cores * 0.25))
+    wrapper:
+        "v7.5.0/bio/samtools/flagstat"
 
 
-# -----------------------------------------------------
-# module to plot biotype distribution
-# -----------------------------------------------------
 rule qc_biotype_barplot:
     input:
         table="results/quantify_biotypes/all_samples_biotype_counts.tsv",
@@ -91,21 +42,18 @@ rule qc_biotype_barplot:
     log:
         path="results/qc/biotypes/log/extract_biotype_data.log",
     message:
-        """--- Generate multiqc barplot data for biotype distribution."""
+        "--- Generate multiqc barplot data for biotype distribution."
     script:
         "../scripts/plot_biotypes.py"
 
 
-# -----------------------------------------------------
-# module to extract software versions from conda envs
-# -----------------------------------------------------
 rule get_conda_envs:
     output:
         "results/versions/log_conda_envs.txt",
     conda:
         "../envs/base.yml"
     message:
-        """--- Extract software version from conda envs."""
+        "--- Extract software version from conda envs."
     params:
         conda_files=" ".join(get_conda_envs_files()),
         conda_envs_log=workflow.source_path("../../resources/conda_envs/conda_envs.log"),
@@ -119,19 +67,16 @@ rule get_conda_envs:
         "fi;"
 
 
-# -----------------------------------------------------
-# module to generate software version yaml file MultiQC
-# -----------------------------------------------------
 rule get_software_yaml:
     input:
         conda_envs="results/versions/log_conda_envs.txt",
     output:
-        yaml="results/versions/rnaseq_preprocessinq_mqc_versions.yml",
-        multi_conf="results/qc/multiqc/multiqc_config.yml",
+        yaml="results/versions/rnaseq_preprocessing_mqc_versions.yml",
+        multi_conf="results/multiqc/multiqc_config.yml",
     conda:
         "../envs/base.yml"
     message:
-        """--- Generate software version yaml file for MultiQC."""
+        "--- Generate software version yaml file for MultiQC."
     log:
         path="results/versions/log/yaml_versions.log",
     params:
@@ -140,31 +85,17 @@ rule get_software_yaml:
         "../scripts/get_versions.py"
 
 
-# -----------------------------------------------------
-# module to run multiQC on input + processed files
-# -----------------------------------------------------
 rule multiqc:
     input:
-        construct_multiqc_input(),
-        config="results/qc/multiqc/multiqc_config.yml",
+        get_multiqc_input,
+        config="results/multiqc/multiqc_config.yml",
     output:
-        report="results/qc/multiqc/multiqc_report.html",
-        final="results/report/multiqc_report.html",
-    conda:
-        "../envs/multiqc.yml"
+        report="results/multiqc/multiqc_report.html",
+    params:
+        extra=config["multiqc"]["extra"],
     message:
-        """--- Generating MultiQC report for seq data."""
+        "--- Generating MultiQC report for seq data"
     log:
-        path="results/qc/multiqc/log/multiqc.log",
-    params:
-        defaults=config["multiqc"]["defaults"],
-        outdir=lambda w, output: os.path.split(output.report)[0],
-        filename=lambda w, output: os.path.split(output.report)[1],
-        qc_dirs=define_multiqc_dirs(),
-    shell:
-        "multiqc {params.defaults} "
-        "--config {input.config} "
-        "--outdir {params.outdir} "
-        "--filename {params.filename} "
-        "--dirs {params.qc_dirs} &> {log.path}; "
-        "cp {output.report} {output.final}"
+        "results/multiqc/log/multiqc.log",
+    wrapper:
+        "v7.5.0/bio/multiqc"
diff --git a/workflow/rules/quantification.smk b/workflow/rules/quantification.smk
index bfb83ed..db1ee6a 100644
--- a/workflow/rules/quantification.smk
+++ b/workflow/rules/quantification.smk
@@ -1,6 +1,3 @@
-# -----------------------------------------------------
-# module to extract selected biotypes from gff file
-# -----------------------------------------------------
 rule extract_features:
     input:
         gff="results/genome/genome.gff",
@@ -9,8 +6,9 @@ rule extract_features:
     conda:
         "../envs/extract_features.yml"
     message:
-        """--- Extract selected biotype features from genome annotation."""
+        "--- Extract selected biotype features from genome annotation."
     params:
+        gff_source_types=config["get_genome"]["gff_source_type"],
         features=config["extract_features"]["biotypes"],
     log:
         path="results/extracted_features/log/extract_features.log",
@@ -18,9 +16,6 @@ rule extract_features:
         "../scripts/extract_features.py"
 
 
-# -----------------------------------------------------
-# module to generate gtf file from gff
-# -----------------------------------------------------
 rule gff2gtf:
     input:
         gff="results/extracted_features/biotypes.gff",
@@ -29,89 +24,53 @@ rule gff2gtf:
     conda:
         "../envs/extract_features.yml"
     message:
-        """--- gff to gtf conversion."""
+        "--- gff to gtf conversion."
     log:
         path="results/extracted_features/log/gff2gtf.log",
     script:
         "../scripts/gff2gtf.py"
 
 
-# -----------------------------------------------------
-# module to generate gtf file from gff
-# -----------------------------------------------------
-if is_single_end_experiment:
-
-    rule quantify_biotypes:
-        input:
-            bam="results/deduplicated/{sample}.bam",
-            gtf="results/extracted_features/biotypes.gtf",
-        output:
-            counts="results/quantify_biotypes/{sample}.counts",
-            summary="results/quantify_biotypes/{sample}.counts.summary",
-        conda:
-            "../envs/feature_counts.yml"
-        message:
-            """--- Quantify biotpyes with subread's featureCount."""
-        log:
-            path="results/quantify_biotypes/log/feature_counts_{sample}.log",
-        threads: min(max(1, int(workflow.cores * 0.2)), 64)  # assign 20% of max cores
-        params:
-            defaults=config["feature_counts"]["defaults"],
-            libtype=config["libtype"],
-        shell:
-            "if [ {params.libtype} == 'sense' ]; then "
-            "libtype=`echo -e '-s 1'`; "
-            "else libtype=`echo -e '-s 2'`; "
-            "fi; "
-            "featureCounts -T {threads} "
-            "{params.defaults} "
-            "${{libtype}} "
-            "-a {input.gtf} "
-            "-o {output.counts} "
-            "{input.bam} &> {log.path}"
-
-
-if is_paired_end_experiment:
-
-    rule quantify_biotypes:
-        input:
-            bam="results/deduplicated/{sample}.bam",
-            gtf="results/extracted_features/biotypes.gtf",
-        output:
-            counts="results/quantify_biotypes/{sample}.counts",
-            summary="results/quantify_biotypes/{sample}.counts.summary",
-        conda:
-            "../envs/feature_counts.yml"
-        message:
-            """--- Quantify biotpyes with subread's featureCount."""
-        log:
-            path="results/quantify_biotypes/log/feature_counts_{sample}.log",
-        threads: min(max(1, int(workflow.cores * 0.2)), 64)  # assign 20% of max cores
-        params:
-            defaults=config["feature_counts"]["defaults"],
-            libtype=config["libtype"],
-        shell:
-            "if [ {params.libtype} == 'sense' ]; then "
-            "libtype=`echo -e '-s 1'`; "
-            "else libtype=`echo -e '-s 2'`; "
-            "fi; "
-            "featureCounts -T {threads} "
-            "{params.defaults} "
-            "${{libtype}} "
-            "-a {input.gtf} "
-            "-p --countReadPairs "
-            "-o {output.counts} "
-            "{input.bam} &> {log.path}"
+rule quantify_biotypes:
+    input:
+        bam="results/deduplicated/{sample}.bam",
+        gtf="results/extracted_features/biotypes.gtf",
+    output:
+        counts="results/qc/biotypes/{sample}.counts",
+        summary="results/qc/biotypes/{sample}.counts.summary",
+    conda:
+        "../envs/feature_counts.yml"
+    message:
+        "--- Quantify biotpyes with subread's featureCount."
+    log:
+        path="results/qc/biotypes/log/{sample}.counts.log",
+    threads: max(1, int(workflow.cores * 0.25))
+    params:
+        defaults=config["feature_counts"]["defaults"],
+        libtype=config["libtype"],
+        paired="-p --countReadPairs" if is_paired_end() else "",
+    shell:
+        """
+        if [ {params.libtype} == 'sense' ]; then
+            libtype=`echo -e '-s 1'`;
+        else
+            libtype=`echo -e '-s 2'`;
+        fi;
+        featureCounts -T {threads} \
+        {params.defaults} \
+        ${{libtype}} \
+        -a {input.gtf} \
+        {params.paired} \
+        -o {output.counts} \
+        {input.bam} &> {log.path}
+        """
 
 
-# -----------------------------------------------------------
-# module to combine count tables and add feature information
-# -----------------------------------------------------------
-rule combine_count_tables:
+rule merge_counts:
     input:
-        counts=expand("results/quantify_biotypes/{sample}.counts", sample=samples.index),
+        counts=expand("results/qc/biotypes/{sample}.counts", sample=samples.index),
         summary=expand(
-            "results/quantify_biotypes/{sample}.counts.summary", sample=samples.index
+            "results/qc/biotypes/{sample}.counts.summary", sample=samples.index
         ),
         gtf="results/extracted_features/biotypes.gtf",
     output:
@@ -119,7 +78,7 @@ rule combine_count_tables:
     conda:
         "../envs/quantify_biotypes.yml"
     message:
-        """--- Combine count tables for all samples."""
+        "--- Combine count tables for all samples."
     log:
         path="results/quantify_biotypes/log/merge_counts.log",
     params:
@@ -128,9 +87,6 @@ rule combine_count_tables:
         "../scripts/merge_counts.py"
 
 
-# -----------------------------------------------------------
-# module to summarize biotype distributions
-# -----------------------------------------------------------
 rule summarize_biotypes:
     input:
         table="results/quantify_biotypes/all_samples_counts.tsv",
@@ -140,7 +96,7 @@ rule summarize_biotypes:
     conda:
         "../envs/quantify_biotypes.yml"
     message:
-        """--- Extract fraction of biotypes for all samples."""
+        "--- Extract fraction of biotypes for all samples."
     log:
         path="results/quantify_biotypes/log/summarize_biotypes.log",
     params:
diff --git a/workflow/rules/trim.smk b/workflow/rules/trim.smk
index 71bf164..8ce1ae7 100644
--- a/workflow/rules/trim.smk
+++ b/workflow/rules/trim.smk
@@ -1,116 +1,21 @@
-if is_single_end_experiment:
-
-    # -----------------------------------------------------
-    # module to trim adapters from reads
-    # -----------------------------------------------------
-    rule cutadapt:
-        input:
-            fastq=get_trimming_input,
-        output:
-            "results/clipped/{sample}.fastq.gz",
-        conda:
-            "../envs/cutadapt.yml"
-        message:
-            """--- Trim adapters from reads."""
-        params:
-            adapter_R1=config["cutadapt"]["read1_adapter"],
-            default=config["cutadapt"]["default"],
-        log:
-            path="results/clipped/log/{sample}.log",
-        threads: int(workflow.cores * 0.4)  # assign 40% of max cores
-        shell:
-            "cutadapt --cores {threads} "
-            "-a {params.adapter_R1} "
-            "{params.default} "
-            "-o {output} "
-            "{input.fastq} &> {log.path}"
-
-
-if is_single_end_experiment and is_rnaseq_nextflex:
-
-    # -----------------------------------------------------
-    # module to remove 4nt from 3' end of reads
-    # -----------------------------------------------------
-    rule truncate_fastq:
-        input:
-            fastq=get_trunc_input,
-        output:
-            "results/trunc_fastq/{sample}.fastq.gz",
-        conda:
-            "../envs/cutadapt.yml"
-        message:
-            """--- Trim 4nt from 3' end of reads."""
-        log:
-            path="results/trunc_fastq/log/{sample}.log",
-        params:
-            default=config["trunc_fastq"]["default"],
-        threads: int(workflow.cores * 0.4)  # assign 40% of max cores
-        shell:
-            "cutadapt --cores {threads} "
-            "{params.default} "
-            "-o {output} "
-            "{input.fastq} &> {log.path}"
-
-
-if is_paired_end_experiment:
-
-    # -----------------------------------------------------
-    # module to trim adapters from paired-end reads
-    # -----------------------------------------------------
-    rule cutadapt_pe:
-        input:
-            fastqs=get_trimming_input,
-        output:
-            R1="results/clipped/{sample}_R1.fastq.gz",
-            R2="results/clipped/{sample}_R2.fastq.gz",
-        conda:
-            "../envs/cutadapt.yml"
-        message:
-            """--- Trim adapters from reads."""
-        log:
-            path="results/clipped/log/{sample}.log",
-        params:
-            adapter_R1=config["cutadapt"]["read1_adapter"],
-            adapter_R2=config["cutadapt"]["read2_adapter"],
-            default=config["cutadapt"]["default"],
-            input_str=lambda w, input: (
-                " ".join(input.fastqs) if len(input.fastqs) == 2 else input.fastqs
-            ),
-        threads: int(workflow.cores * 0.4)  # assign 40% of max cores
-        shell:
-            "cutadapt --cores {threads} "
-            "-a {params.adapter_R1} "
-            "-A {params.adapter_R2} "
-            "{params.default} "
-            "-o {output.R1} -p {output.R2} "
-            "{params.input_str} &> {log.path}"
-
-
-if is_paired_end_experiment and is_rnaseq_nextflex:
-
-    # -----------------------------------------------------
-    # module to remove 4nt from 3' end of paired-end reads
-    # -----------------------------------------------------
-    rule truncate_fastq_pe:
-        input:
-            fastqs=get_trunc_input,
-        output:
-            R1="results/trunc_fastq/{sample}_R1.fastq.gz",
-            R2="results/trunc_fastq/{sample}_R2.fastq.gz",
-        conda:
-            "../envs/cutadapt.yml"
-        message:
-            """--- Trim 4nt from 3' end of reads."""
-        log:
-            path="results/trunc_fastq/log/{sample}.log",
-        params:
-            default=config["trunc_fastq"]["default"],
-            input_str=lambda w, input: (
-                " ".join(input.fastqs) if len(input.fastqs) == 2 else input.fastqs
-            ),
-        threads: int(workflow.cores * 0.4)  # assign 40% of max cores
-        shell:
-            "cutadapt --cores {threads} "
-            "{params.default} "
-            "-o {output.R1} -p {output.R2} "
-            "{params.input_str} &> {log.path}"
+rule fastp:
+    input:
+        sample=get_fastq_pairs,
+    output:
+        html="results/fastp/{sample}.html",
+        json="results/fastp/{sample}.json",
+        trimmed=expand(
+            "results/fastp/{{sample}}_{read}.fastq.gz",
+            read=["read1", "read2"] if is_paired_end() else ["read1"],
+        ),
+    log:
+        "results/fastp/log/{sample}.log",
+    message:
+        "trimming and QC filtering reads using fastp"
+    params:
+        extra=config["fastp"]["extra"],
+    threads: max(1, int(workflow.cores * 0.25))
+    resources:
+        mem_mb=4096,
+    wrapper:
+        "v7.0.0/bio/fastp"
diff --git a/workflow/scripts/extract_features.py b/workflow/scripts/extract_features.py
index 06f3395..5b13b32 100644
--- a/workflow/scripts/extract_features.py
+++ b/workflow/scripts/extract_features.py
@@ -17,6 +17,14 @@
 log = []
 error = []
 
+default_sources = [
+    {"RefSeq": "gene"},
+    {"RefSeq": "pseudogene"},
+    {"RefSeq": "CDS"},
+    {"Protein Homology": "CDS"},
+]
+gff_source_types = snakemake.params.get("gff_source_types", default_sources)
+
 if not path.exists(input_gff):
     error += ["The parameter 'gff' is not a valid path to a GFF file"]
 
@@ -25,9 +33,10 @@
         try:
             with open(input_gff, "r") as gff_file:
                 examiner = GFFExaminer()
-                limits = dict(
-                    gff_source_type=[("RefSeq", "gene"), ("RefSeq", "pseudogene")]
-                )
+                gff_source_type = []
+                for i in gff_source_types:
+                    gff_source_type += list(i.items())
+                limits = dict(gff_source_type=gff_source_type)
                 for rec in GFF.parse(gff_file, limit_info=limits):
                     sel_features = []
                     for feat in rec.features:
diff --git a/workflow/scripts/extract_umis.py b/workflow/scripts/extract_umis.py
index 2f10ce4..de64f38 100644
--- a/workflow/scripts/extract_umis.py
+++ b/workflow/scripts/extract_umis.py
@@ -33,34 +33,27 @@ def extract_umi(R1, R2, output, log, error):
 
 # set input/output parameters
 # ---------------------------
-is_paired_end = False
-
-# paired-end
-if len(snakemake.input) > 2:
-    is_paired_end = True
-    file_r1 = snakemake.input[0]
-    file_r2 = snakemake.input[1]
-    file_umis = snakemake.input[2]
-    output_r1 = snakemake.output["R1"]
-    output_r2 = snakemake.output["R2"]
-# single-end
-else:
-    file_r1 = snakemake.input[0]
-    file_umis = snakemake.input[1]
-    output_r1 = snakemake.output["fastq"]
-
-
-output_log = snakemake.log["path"]
+output_log = snakemake.log[0]
 log = []
 error = []
 
-if is_paired_end:
+# paired-end
+if len(snakemake.input) > 2:
+    file_r1 = snakemake.input["fq1"]
+    file_r2 = snakemake.input["fq2"]
+    file_umis = snakemake.input["fqumi"]
+    output_r1 = snakemake.output["fq1"]
+    output_r2 = snakemake.output["fq2"]
     log += ["Processing R1 ..."]
     extract_umi(R1=file_r1, R2=file_umis, output=output_r1, log=log, error=error)
-    # need to write R2 as well
     log += ["Processing R2 ..."]
     extract_umi(R1=file_r2, R2=file_umis, output=output_r2, log=log, error=error)
+# single-end
 else:
+    is_paired_end = False
+    file_r1 = snakemake.input["fq1"]
+    file_umis = snakemake.input["fqumi"]
+    output_r1 = snakemake.output["fq1"]
     extract_umi(R1=file_r1, R2=file_umis, output=output_r1, log=log, error=error)
 
 
diff --git a/workflow/scripts/get_genome.py b/workflow/scripts/get_genome.py
deleted file mode 100644
index cc96979..0000000
--- a/workflow/scripts/get_genome.py
+++ /dev/null
@@ -1,163 +0,0 @@
-#!/usr/bin/python
-
-# GET GENOME
-# -----------------------------------------------------------------------------
-#
-# This script attempts to download genome sequence (FASTA) and
-# genome annotation (GFF / GTF) files from NCBI using the NCBI datasets
-# API, or a similar database. Also, the genome sequence is indexed using samtools.
-# Alternatively, a FASTA and GFF file can be
-# supplied by the user. Input is roughly checked for validity.
-
-from os import path
-from BCBio.GFF import GFFExaminer
-from BCBio import GFF
-from subprocess import getoutput
-
-input_database = snakemake.params["database"]
-input_assembly = snakemake.params["assembly"]
-input_fasta = snakemake.params["fasta"]
-input_gff = snakemake.params["gff"]
-output_path = snakemake.output["path"]
-output_fasta = snakemake.output["fasta"]
-output_gff = snakemake.output["gff"]
-output_log = snakemake.log["path"]
-log = []
-error = []
-
-
-def check_fasta(input_fasta, log=[], error=[]):
-    with open(input_fasta, "r") as fasta_file:
-        fasta = fasta_file.read()
-    n_items = fasta.count(">")
-    if n_items:
-        log += [f"Supplied fasta file '{input_fasta}' was found"]
-        log += [f"Supplied fasta file contains {n_items} items"]
-    else:
-        error += ["The supplied fasta file contains no valid entries starting with '>'"]
-    return fasta, log, error
-
-
-def check_gff(input_gff, log=[], error=[]):
-    with open(input_gff, "r") as gff_file:
-        gff_examiner = GFFExaminer()
-        log += [f"Supplied GFF file '{input_gff}' was found"]
-        gff_summary = gff_examiner.available_limits(gff_file)
-        log += [
-            f"Supplied GFF file contains the following items:",
-            "--------------------",
-        ]
-        for item in gff_summary["gff_source_type"]:
-            log += ["-".join(item) + " : " + str(gff_summary["gff_source_type"][item])]
-    with open(input_gff, "r") as gff_file:
-        new_gff = []
-        gff_source_type = []
-        for i in snakemake.config["get_genome"]["gff_source_type"]:
-            gff_source_type += list(i.items())
-        limits = dict(gff_source_type=gff_source_type)
-        for rec in GFF.parse(gff_file, limit_info=limits):
-            for recfeat in rec.features:
-                rec_keys = recfeat.qualifiers.keys()
-                if not "Name" in rec_keys:
-                    if "locus_tag" in rec_keys:
-                        recfeat.qualifiers["Name"] = recfeat.qualifiers["locus_tag"]
-                    else:
-                        error += [
-                            "required fields 'Name','locus_tag' missing in *.gff file"
-                        ]
-                else:
-                    if "locus_tag" in rec_keys:
-                        recfeat.qualifiers["trivial_name"] = recfeat.qualifiers["Name"]
-                        recfeat.qualifiers["Name"] = recfeat.qualifiers["locus_tag"]
-                if not "ID" in rec_keys:
-                    if "locus_tag" in rec_keys:
-                        recfeat.qualifiers["ID"] = recfeat.qualifiers["locus_tag"]
-                    elif "Name" in rec_keys:
-                        recfeat.qualifiers["ID"] = recfeat.qualifiers["Name"]
-                    else:
-                        error += [
-                            "required fields 'ID','locus_tag' missing in *.gff file"
-                        ]
-            new_gff += [rec]
-    return new_gff, log, error
-
-
-if input_database.lower() == "ncbi":
-    ncbi_result = getoutput(
-        f"datasets summary genome accession {input_assembly} --as-json-lines | "
-        + "dataformat tsv genome --fields accession,annotinfo-release-date,organism-name"
-    )
-    if ncbi_result.startswith("Error"):
-        error += [ncbi_result]
-        error += [
-            "The supplied refseq/genbank ID was not valid. Example for correct input: 'GCF_000009045.1'"
-        ]
-    elif len(ncbi_result) == 0:
-        error += [
-            "The result from fetching NCBI genome data has zero length. Please check your internet connection!"
-        ]
-    else:
-        ncbi_genome = [
-            i.split("\t")
-            for i in ncbi_result.split("\n")
-            if not (i.startswith("New version") or i.startswith("Warning"))
-        ]
-        ncbi_genome = dict(zip(ncbi_genome[0], ncbi_genome[1]))
-        log += ["Found the following genome(s):"]
-        for k in ncbi_genome.keys():
-            log += ["{0}: {1}".format(k, ncbi_genome.get(k))]
-        refseq_id = ncbi_genome.get("Assembly Accession")
-        if not refseq_id.startswith("GCF_"):
-            error += ["The RefSeq ID '{0}' has no valid format.".format(refseq_id)]
-        ncbi_command = (
-            f"datasets download genome accession {refseq_id}"
-            + f" --filename {output_path}/database.zip --include genome,gff3; "
-            + f"cd {output_path}; unzip database.zip; rm database.zip"
-        )
-        copy_command = (
-            f"cp {output_path}/ncbi_dataset/data/{refseq_id}/*.fna {output_fasta}; "
-            + f"cp {output_path}/ncbi_dataset/data/{refseq_id}/genomic.gff {output_gff}"
-        )
-        index_command = f"samtools faidx {output_fasta}"
-        cleanup_command = (
-            f"rm -rf {output_path}/ncbi_dataset; " + f"rm -f {output_path}/README.md"
-        )
-        str_out = getoutput(ncbi_command)
-        str_cp = getoutput(copy_command)
-        str_cl = getoutput(cleanup_command)
-        # import and check files
-        fasta, log, error = check_fasta(output_fasta, log, error)
-        index_out = getoutput(index_command)
-        gff, log, error = check_gff(output_gff, log, error)
-        # write cleaned gff file
-        with open(output_gff, "w") as gff_out:
-            GFF.write(gff, gff_out)
-
-elif input_database.lower() == "manual":
-    if not path.exists(input_fasta):
-        error += ["The parameter 'fasta' is not a valid path to a FASTA file"]
-    elif not path.exists(input_gff):
-        error += ["The parameter 'gff' is not a valid path to a GFF/GTF file"]
-    else:
-        # import and check files
-        fasta, log, error = check_fasta(input_fasta, log, error)
-        gff, log, error = check_gff(input_gff, log, error)
-        # export fasta and gff files
-        with open(output_fasta, "w") as fasta_out:
-            fasta_out.write(fasta)
-        with open(output_gff, "w") as gff_out:
-            GFF.write(gff, gff_out)
-else:
-    error += ["The parameter 'database' is none of 'ncbi', 'manual'"]
-
-# print error/log messages
-if error:
-    print("\n".join(error))
-    raise ValueError(
-        "Location or format of the supplied genome files was not correct, quitting"
-    )
-else:
-    log += ["Module finished successfully\n"]
-    log = ["GET_GENOME: " + i for i in log]
-    with open(output_log, "w") as log_file:
-        log_file.write("\n".join(log))
diff --git a/workflow/scripts/get_versions.py b/workflow/scripts/get_versions.py
index 8038b38..ac82ac8 100644
--- a/workflow/scripts/get_versions.py
+++ b/workflow/scripts/get_versions.py
@@ -7,8 +7,6 @@
 # conda envs file.
 # -----------------------------------------------------------------------------
 
-import os
-import pandas as pd
 import yaml
 
 
@@ -33,13 +31,14 @@
     error += [f"Error occurred when processing input file '{input_envs}'."]
 
 try:
-    with open(input_conf, "r") as input_conf:
-        config_dic = yaml.safe_load(input_conf)
+    with open(input_conf, "r") as config_file:
+        config_dic = yaml.safe_load(config_file)
 except IOError:
+    config_dic = {}
     error += [f"Error occurred when reading input config '{input_conf}'."]
 
 # software info to multiqc config
-config_dic["software_versions"] = dict(version_dic)
+config_dic["software_versions"] = version_dic
 
 try:
     with open(output_yml, "w") as out_yml:
@@ -48,13 +47,12 @@
 except IOError:
     error += [f"Output file '{output_yml}' can not be opened."]
 
-
 try:
     with open(out_multi, "w") as multi_yml:
         log += [f"Writing output YAML file '{out_multi}'..."]
         yaml.dump(config_dic, multi_yml, default_flow_style=False)
 except IOError:
-    error += [f"Output file '{output_yml}' can not be opened."]
+    error += [f"Output file '{out_multi}' can not be opened."]
 
 
 # print error/log messages
diff --git a/workflow/scripts/merge_counts.py b/workflow/scripts/merge_counts.py
index 2d9dc89..297fa0c 100644
--- a/workflow/scripts/merge_counts.py
+++ b/workflow/scripts/merge_counts.py
@@ -45,23 +45,18 @@
     ]
     biotypes = pd.read_table(gtf, names=gtf_cols, comment="#")
 
-    bio_type = (
-        pd.DataFrame(biotypes.attributes.apply(lambda x: x.split(";")).to_list())
-        .loc[:, 2]
-        .apply(lambda x: x.lstrip().split(" ")[1].replace('"', ""))
-    )
-
-    locus_tag = (
-        pd.DataFrame(biotypes.attributes.apply(lambda x: x.split(";")).to_list())
-        .loc[:, 3]
-        .apply(lambda x: x.lstrip().split(" ")[1].replace('"', ""))
-    )
-
-    name = (
-        pd.DataFrame(biotypes.attributes.apply(lambda x: x.split(";")).to_list())
-        .loc[:, 4]
-        .apply(lambda x: x.lstrip().split(" ")[1].replace('"', ""))
-    )
+    def parse_attrs(s):
+        pairs = [p.strip() for p in s.split(';') if p.strip()]
+        d = {}
+        for p in pairs:
+            k, v = p.split(' ', 1)
+            d[k] = v.strip().strip('"')
+        return d
+
+    attrs = biotypes.attributes.apply(parse_attrs)
+    bio_type = attrs.map(lambda d: d.get("gene_biotype") or d.get("biotype"))
+    locus_tag = attrs.map(lambda d: d.get("locus_tag"))
+    name = attrs.map(lambda d: d.get("Name") or d.get("gene_name"))
 
     meta_info = pd.concat([locus_tag, name, bio_type], axis=1, sort=False)
     meta_info.columns = ["locus_tag", "gene_name", "biotype"]