From 2ad495f1cb19324660d4569380cffe17ab2ffc62 Mon Sep 17 00:00:00 2001
From: angelphanth <angel.phanth@hotmail.com>
Date: Thu, 12 Mar 2026 16:50:26 +0000
Subject: [PATCH 01/14] copying integration of pfam and funfam

---
 conf/modules.config                           | 22 ++++++++++++
 conf/test.config                              |  1 +
 main.nf                                       |  3 ++
 nextflow.config                               |  3 ++
 nextflow_schema.json                          | 17 +++++++++
 subworkflows/local/domain_annotation/main.nf  | 36 +++++++++++++++++++
 subworkflows/local/domain_annotation/meta.yml | 16 +++++++++
 .../main.nf                                   |  4 +--
 workflows/proteinannotator.nf                 |  8 ++++-
 9 files changed, 107 insertions(+), 3 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index ec1428c..9f40a6d 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -90,6 +90,17 @@ process {
         ]
     }
 
+    withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:WGET_MROOT' {
+        ext.prefix = "HMM"
+        ext.suffix = "tar.gz"
+        ext.args   = '--no-check-certificate' // explicitly naming output
+        publishDir = [
+            path: { "${params.outdir}/downloaded_dbs/" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
     withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:HMMSEARCH_PFAM' {
         ext.args   = { "-E ${params.hmmsearch_evalue_cutoff}" }
         publishDir = [
@@ -110,6 +121,17 @@ process {
         ]
     }
 
+    withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:HMMSEARCH_MROOT' {
+        ext.args   = { "-E ${params.hmmsearch_evalue_cutoff}" }
+        publishDir = [
+            path: { "${params.outdir}/domain_annotation/mroot/" },
+            mode: params.publish_dir_mode,
+            pattern: "*.domtbl.gz",
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
+
     withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:FUNCTIONAL_ANNOTATION:ARIA2' {
         publishDir = [
             path: { "${params.outdir}/downloaded_dbs/" },
diff --git a/conf/test.config b/conf/test.config
index 252ec87..02c92be 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -27,6 +27,7 @@ params {
     // Domain annotation
     pfam_latest_link   = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz'
     funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz'
+    mroot_latest_link  =  'https://pavlopoulos-lab.org/metagroot/DownloadHmm'
     // Functional annotation
     interproscan_db_url       = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan/interproscan_test.tar.gz'
     interproscan_applications = 'Hamap,TIGRFAM,sfld'
diff --git a/main.nf b/main.nf
index 98d7d67..d7f1972 100644
--- a/main.nf
+++ b/main.nf
@@ -46,6 +46,9 @@ workflow NFCORE_PROTEINANNOTATOR {
         params.skip_funfam,
         params.funfam_db,
         params.funfam_latest_link,
+        params.skip_mroot,
+        params.mroot_db,
+        params.mroot_latest_link,
         params.skip_interproscan,
         params.interproscan_db_url,
         params.interproscan_db,
diff --git a/nextflow.config b/nextflow.config
index e56f91f..f83d29d 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -25,6 +25,9 @@ params {
     skip_funfam             = false
     funfam_db               = null
     funfam_latest_link      = "https://download.cathdb.info/cath/releases/all-releases/v4_3_0/sequence-data/funfam-hmm3-v4_3_0.lib.gz"
+    skip_mroot              = false
+    mroot_db                = null
+    mroot_latest_link       = "https://pavlopoulos-lab.org/metagroot/DownloadHmm"
     hmmsearch_evalue_cutoff = 0.001
 
     // Functional annotation
diff --git a/nextflow_schema.json b/nextflow_schema.json
index b7ad6d8..754cd6c 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -276,6 +276,23 @@
                     "default": "https://download.cathdb.info/cath/releases/all-releases/v4_3_0/sequence-data/funfam-hmm3-v4_3_0.lib.gz",
                     "description": "CATH hosted link to the latest available (v4_3_0) FunFam HMM database file."
                 },
+                "skip_mroot": {
+                    "type": "boolean",
+                    "fa_icon": "fas fa-ban",
+                    "description": "Skip the domain annotation with the MetagRoot database.",
+                    "help": "Skips the domain annotation of input sequence against a MetagRoot database."
+                },
+                "mroot_db": {
+                    "type": "string",
+                    "format": "file-path",
+                    "description": "Path to an already installed MetagRoot HMM database (.tar.gz).",
+                    "help_text": "If left null and skip_mroot is false, the pipeline will start downloading the latest MetagRoot HMM library."
+                },
+                "mroot_latest_link": {
+                    "type": "string",
+                    "default": "https://pavlopoulos-lab.org/metagroot/DownloadHmm",
+                    "description": "MetagRoot hosted link to the latest available MetagRoot HMM database file."
+                },
                 "hmmsearch_evalue_cutoff": {
                     "type": "number",
                     "default": 0.001,
diff --git a/subworkflows/local/domain_annotation/main.nf b/subworkflows/local/domain_annotation/main.nf
index 1ec8289..4e76a56 100644
--- a/subworkflows/local/domain_annotation/main.nf
+++ b/subworkflows/local/domain_annotation/main.nf
@@ -1,7 +1,10 @@
 include { ARIA2 as ARIA2_PFAM                 } from '../../../modules/nf-core/aria2/main'
 include { ARIA2 as ARIA2_FUNFAM               } from '../../../modules/nf-core/aria2/main'
+include { WGET as WGET_MROOT                  } from '../../../modules/nf-core/wget/main'
 include { HMMER_HMMSEARCH as HMMSEARCH_PFAM   } from '../../../modules/nf-core/hmmer/hmmsearch/main'
 include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main'
+include { HMMER_HMMSEARCH as HMMSEARCH_MROOT  } from '../../../modules/nf-core/hmmer/hmmsearch/main'
+include { UNTAR as UNTAR_MROOT                } from '../../../modules/nf-core/untar/main'
 
 workflow DOMAIN_ANNOTATION {
     take:
@@ -12,12 +15,16 @@ workflow DOMAIN_ANNOTATION {
     skip_funfam         // boolean
     funfam_db           // string, path to the funfam HMM database, if already exists
     funfam_latest_link  // string, path to the latest funfam HMM database, to download
+    skip_mroot          // boolean
+    mroot_db            // string, path to the metagroot HMM database, if already exists
+    mroot_latest_link   // string, path to the latest metagroot HMM database, to download
 
     main:
 
     ch_versions       = channel.empty()
     ch_pfam_domains   = channel.empty()
     ch_funfam_domains = channel.empty()
+    ch_mroot_domains  = channel.empty()
 
     if (!skip_pfam) {
         if (!pfam_db) {
@@ -59,8 +66,37 @@ workflow DOMAIN_ANNOTATION {
         ch_funfam_domains = HMMSEARCH_FUNFAM.out.domain_summary
     }
 
+    if (!skip_mroot) {
+        if (!mroot_db) {
+            ch_mroot_link = channel.of([ [ id: 'mroot' ], mroot_latest_link ])
+            // download file from url
+            WGET_MROOT( ch_mroot_link )
+            // untar file if its a tar.gz
+            UNTAR_MROOT( WGET_MROOT.out.outfile )
+            // extract hmm files from dir
+            ch_mroot_db = UNTAR_MROOT.out.untar
+            .map {
+                meta, dir ->
+                // collect all .hmm files from dir
+                def hmm_files = file("${dir}/**/*.hmm")
+                tuple(meta, hmm_files)
+            }
+        } else {
+            ch_mroot_db = channel.of([ [ id: 'mroot' ], mroot_db ])
+        }
+
+        ch_input_for_hmmsearch_mroot = ch_fasta
+            .combine(ch_mroot_db)
+            .map{ meta, seqs, _meta2, models -> [meta, models, seqs, false, false, true] }
+
+        HMMSEARCH_MROOT( ch_input_for_hmmsearch_mroot )
+        ch_versions = ch_versions.mix( HMMSEARCH_MROOT.out.versions.first() )
+        ch_mroot_domains = HMMSEARCH_MROOT.out.domain_summary
+    }
+
     emit:
     pfam_domains   = ch_pfam_domains
     funfam_domains = ch_funfam_domains
+    mroot_domains  = ch_mroot_domains
     versions       = ch_versions
 }
diff --git a/subworkflows/local/domain_annotation/meta.yml b/subworkflows/local/domain_annotation/meta.yml
index e04e241..ad37de9 100644
--- a/subworkflows/local/domain_annotation/meta.yml
+++ b/subworkflows/local/domain_annotation/meta.yml
@@ -42,6 +42,18 @@ input:
       type: string
       description: |
         Path to the latest FunFam HMM database, to download
+  - skip_mroot:
+      type: boolean
+      description: |
+        Skip domain annotation with MetagRoot
+  - mroot_db:
+      type: string
+      description: |
+        Path to an existing HMM MetagRoot library on the system. If provided, the ARIA2_METAGROOT db download will be skipped.
+  - mroot_latest_link:
+      type: string
+      description: |
+        Path to the latest MetagRoot HMM database, to download
 output:
   - pfam_domains:
       type: file
@@ -51,6 +63,10 @@ output:
       type: file
       description: |
         domtbl.gz files with funfam domain annotation for input amino acid sequences
+  - mroot_domains:
+      type: file
+      description: |
+        domtbl.gz files with metagroot domain annotation for input amino acid sequences
   - versions:
       type: file
       description: |
diff --git a/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf
index 1ba3ccc..7ef2d1a 100644
--- a/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf
@@ -180,7 +180,7 @@ def toolCitationText() {
         params.skip_preprocessing ? "" : "Input sequences were preprocessed with SeqKit (gap trimming, length filtering, validation, duplicate removal) (Shen et al. 2024)."
     ].join(' ').trim()
 
-    def domain_annotation_text = (params.skip_pfam && params.skip_funfam) ? "" : "Domains were annotated with hmmer/hmmsearch (Eddy et al. 2011)."
+    def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_mroot) ? "" : "Domains were annotated with hmmer/hmmsearch (Eddy et al. 2011)."
 
     def prediction_text = params.skip_s4pred ? "" : "Secondary structures were predicted via the s4pred software (Moffat et al. 2021)."
 
@@ -202,7 +202,7 @@ def toolBibliographyText() {
         params.skip_preprocessing ? '' : '<li>Shen, W., Sipos, B., & Zhao, L. (2024). SeqKit2: A Swiss army knife for sequence and alignment processing. Imeta, 3(3), e191. doi: <a href="https://doi.org/10.1002/imt2.191">10.1002/imt2.191</a></li>'
     ].join(' ').trim()
 
-    def domain_annotation_text = (params.skip_pfam && params.skip_funfam) ? '' : '<li>Eddy, S. R. (2011). Accelerated profile HMM searches. PLoS computational biology, 7(10), e1002195. doi: <a href="https://doi.org/10.1371/journal.pcbi.1002195">10.1371/journal.pcbi.1002195</a></li>'
+    def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_mroot) ? '' : '<li>Eddy, S. R. (2011). Accelerated profile HMM searches. PLoS computational biology, 7(10), e1002195. doi: <a href="https://doi.org/10.1371/journal.pcbi.1002195">10.1371/journal.pcbi.1002195</a></li>'
 
     def prediction_text = params.skip_s4pred ? '' : '<li>Moffat, L., & Jones, D. T. (2021). Increasing the accuracy of single sequence prediction methods using a deep semi-supervised learning framework. Bioinformatics, 37(21), 3744-3751. doi: <a href="https://doi.org/10.1093/bioinformatics/btab491">10.1093/bioinformatics/btab491</a></li>'
 
diff --git a/workflows/proteinannotator.nf b/workflows/proteinannotator.nf
index fae1d7a..4398d69 100644
--- a/workflows/proteinannotator.nf
+++ b/workflows/proteinannotator.nf
@@ -29,6 +29,9 @@ workflow PROTEINANNOTATOR {
     skip_funfam         // boolean
     funfam_db           // string, path to the pfam HMM database, if already exists
     funfam_latest_link  // string, path to the latest pfam HMM database, to download
+    skip_mroot          // boolean
+    mroot_db            // string, path to the metagroot HMM database, if already exists
+    mroot_latest_link   // string, path to the latest metagroot HMM database, to download
     skip_interproscan   // boolean
     interproscan_db_url // string, url to download db
     interproscan_db     // string, existing db
@@ -49,7 +52,10 @@ workflow PROTEINANNOTATOR {
         pfam_latest_link,
         skip_funfam,
         funfam_db,
-        funfam_latest_link
+        funfam_latest_link,
+        skip_mroot,
+        mroot_db,
+        mroot_latest_link
     )
     ch_versions = ch_versions.mix( DOMAIN_ANNOTATION.out.versions )
 

From 63fd565dcbe9a311932fe8feff14a960bdaadc38 Mon Sep 17 00:00:00 2001
From: angelphanth <angel.phanth@hotmail.com>
Date: Thu, 12 Mar 2026 16:53:15 +0000
Subject: [PATCH 02/14] nf-core modules wget and untar as a workaround for
 aria2 to get metagroot tar file?

---
 modules.json                                 |  5 ++
 modules/nf-core/wget/environment.yml         |  7 ++
 modules/nf-core/wget/main.nf                 | 48 ++++++++++++++
 modules/nf-core/wget/meta.yml                | 52 +++++++++++++++
 modules/nf-core/wget/tests/main.nf.test      | 62 +++++++++++++++++
 modules/nf-core/wget/tests/main.nf.test.snap | 70 ++++++++++++++++++++
 modules/nf-core/wget/tests/nextflow.config   |  6 ++
 7 files changed, 250 insertions(+)
 create mode 100644 modules/nf-core/wget/environment.yml
 create mode 100644 modules/nf-core/wget/main.nf
 create mode 100644 modules/nf-core/wget/meta.yml
 create mode 100644 modules/nf-core/wget/tests/main.nf.test
 create mode 100644 modules/nf-core/wget/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/wget/tests/nextflow.config

diff --git a/modules.json b/modules.json
index 37ba5b8..6a31b58 100644
--- a/modules.json
+++ b/modules.json
@@ -59,6 +59,11 @@
                         "branch": "master",
                         "git_sha": "447f7bc0fa41dfc2400c8cad4c0291880dc060cf",
                         "installed_by": ["modules"]
+                    },
+                    "wget": {
+                        "branch": "master",
+                        "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
+                        "installed_by": ["modules"]
                     }
                 }
             },
diff --git a/modules/nf-core/wget/environment.yml b/modules/nf-core/wget/environment.yml
new file mode 100644
index 0000000..9eb304e
--- /dev/null
+++ b/modules/nf-core/wget/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::wget=1.21.4
diff --git a/modules/nf-core/wget/main.nf b/modules/nf-core/wget/main.nf
new file mode 100644
index 0000000..9bc6f15
--- /dev/null
+++ b/modules/nf-core/wget/main.nf
@@ -0,0 +1,48 @@
+process WGET {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/3b/3b54fa9135194c72a18d00db6b399c03248103f87e43ca75e4b50d61179994b3/data':
+        'community.wave.seqera.io/library/wget:1.21.4--8b0fcde81c17be5e' }"
+
+    input:
+    tuple val(meta), val(url)
+
+    output:
+    tuple val(meta), path("${prefix}.${suffix}"), emit: outfile
+    path "versions.yml"                         , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args   ?: ''
+    prefix   = task.ext.prefix ?: "${meta.id}"
+    suffix   = task.ext.suffix ?: 'html'
+    """
+    wget \\
+        -O - \\
+        $args \\
+        $url \\
+        > ${prefix}.${suffix}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        wget: \$(wget --version | head -1 | cut -d ' ' -f 3)
+    END_VERSIONS
+    """
+
+    stub:
+    prefix   = task.ext.prefix ?: "${meta.id}"
+    suffix   = task.ext.suffix ?: 'html'
+    """
+    touch ${prefix}.${suffix}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        wget: \$(wget --version | head -1 | cut -d ' ' -f 3)
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/wget/meta.yml b/modules/nf-core/wget/meta.yml
new file mode 100644
index 0000000..56df0af
--- /dev/null
+++ b/modules/nf-core/wget/meta.yml
@@ -0,0 +1,52 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "wget"
+description: The non-interactive network downloader
+keywords:
+  - "wget"
+  - "download"
+  - "network"
+tools:
+  - "wget":
+      description: "wget is a free utility for non-interactive download of files from
+        the Web."
+      homepage: "https://www.gnu.org/software/wget/"
+      documentation: "https://www.gnu.org/software/wget/manual/wget.html"
+      licence: ["GPL"]
+      identifier: ""
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1' ]`
+    - url:
+        type: string
+        description: URL to download
+        pattern: "^https?://*.*"
+
+output:
+  outfile:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - ${prefix}.${suffix}:
+          type: file
+          description: Downloaded file
+          pattern: "*.*"
+
+          ontologies: []
+  versions:
+    - versions.yml:
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+
+        ontologies:
+          - edam: http://edamontology.org/format_3750 # YAML
+authors:
+  - "@itrujnara"
+maintainers:
+  - "@itrujnara"
diff --git a/modules/nf-core/wget/tests/main.nf.test b/modules/nf-core/wget/tests/main.nf.test
new file mode 100644
index 0000000..e094288
--- /dev/null
+++ b/modules/nf-core/wget/tests/main.nf.test
@@ -0,0 +1,62 @@
+// nf-core modules test wget
+nextflow_process {
+
+    name "Test Process WGET"
+    script "../main.nf"
+    process "WGET"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "wget"
+
+    test("sarscov2 - gff") {
+
+        config "./nextflow.config"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/sarscov2/genome/genome.gff3",
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - gff - stub") {
+
+        options "-stub"
+
+        config "./nextflow.config"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/sarscov2/genome/genome.gff3",
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/wget/tests/main.nf.test.snap b/modules/nf-core/wget/tests/main.nf.test.snap
new file mode 100644
index 0000000..6c05160
--- /dev/null
+++ b/modules/nf-core/wget/tests/main.nf.test.snap
@@ -0,0 +1,70 @@
+{
+    "sarscov2 - gff": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.gff3:md5,357435a81a9981a0128e840ebe11051e"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35"
+                ],
+                "outfile": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.gff3:md5,357435a81a9981a0128e840ebe11051e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.10.4"
+        },
+        "timestamp": "2025-03-26T12:27:32.67617"
+    },
+    "sarscov2 - gff - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35"
+                ],
+                "outfile": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.10.4"
+        },
+        "timestamp": "2025-03-26T12:21:06.414955"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/wget/tests/nextflow.config b/modules/nf-core/wget/tests/nextflow.config
new file mode 100644
index 0000000..236f4e1
--- /dev/null
+++ b/modules/nf-core/wget/tests/nextflow.config
@@ -0,0 +1,6 @@
+process {
+    withName: "WGET" {
+        ext.prefix = "test"
+        ext.suffix = "gff3"
+    }
+}

From 694306403b6b501ddeeadb76a8e93956034fb10f Mon Sep 17 00:00:00 2001
From: angelphanth <angel.phanth@hotmail.com>
Date: Mon, 30 Mar 2026 16:26:15 +0100
Subject: [PATCH 03/14] fix ARIA2_MROOT and take: alignment

---
 subworkflows/local/domain_annotation/main.nf | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/subworkflows/local/domain_annotation/main.nf b/subworkflows/local/domain_annotation/main.nf
index 1d0054a..afb1569 100644
--- a/subworkflows/local/domain_annotation/main.nf
+++ b/subworkflows/local/domain_annotation/main.nf
@@ -1,7 +1,7 @@
 include { ARIA2 as ARIA2_PFAM                  } from '../../../modules/nf-core/aria2/main'
 include { ARIA2 as ARIA2_FUNFAM                } from '../../../modules/nf-core/aria2/main'
 include { ARIA2 as ARIA2_NMPFAMS               } from '../../../modules/nf-core/aria2/main'
-include { ARIA2 as ARIA2_NMPFAMS               } from '../../../modules/nf-core/aria2/main'
+include { ARIA2 as ARIA2_MROOT                 } from '../../../modules/nf-core/aria2/main'
 include { HMMER_HMMSEARCH as HMMSEARCH_PFAM    } from '../../../modules/nf-core/hmmer/hmmsearch/main'
 include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM  } from '../../../modules/nf-core/hmmer/hmmsearch/main'
 include { HMMER_HMMSEARCH as HMMSEARCH_NMPFAMS } from '../../../modules/nf-core/hmmer/hmmsearch/main'
@@ -19,10 +19,10 @@ workflow DOMAIN_ANNOTATION {
     skip_nmpfams           // boolean
     nmpfams_db             // string
     nmpfams_latest_link    // string
-    skip_mroot          // boolean
-    mroot_db            // string, path to the metagroot HMM database, if already exists
-    mroot_latest_link   // string, path to the latest metagroot HMM database, to download
-    
+    skip_mroot             // boolean
+    mroot_db               // string, path to the metagroot HMM database, if already exists
+    mroot_latest_link      // string, path to the latest metagroot HMM database, to download
+
     main:
 
     ch_versions        = channel.empty()
@@ -90,7 +90,7 @@ workflow DOMAIN_ANNOTATION {
         ch_versions = ch_versions.mix( HMMSEARCH_NMPFAMS.out.versions.first() )
         ch_nmpfams_domains = HMMSEARCH_NMPFAMS.out.domain_summary
     }
-    
+
     if (!skip_mroot) {
         if (!mroot_db) {
             ch_mroot_link = channel.of([ [ id: 'mroot' ], mroot_latest_link ])

From 1cf0555036da472214fb443c303ce42d382c1ef9 Mon Sep 17 00:00:00 2001
From: angelphanth <angel.phanth@hotmail.com>
Date: Mon, 30 Mar 2026 17:15:27 +0100
Subject: [PATCH 04/14] replace 'mroot' with 'metagroot'

---
 conf/modules.config                           |  8 +--
 conf/test.config                              |  2 +-
 conf/test_full.config                         |  1 +
 main.nf                                       |  4 +-
 nextflow.config                               |  6 +--
 nextflow_schema.json                          | 10 ++--
 subworkflows/local/domain_annotation/main.nf  | 38 +++++++-------
 subworkflows/local/domain_annotation/meta.yml |  8 +--
 .../domain_annotation/tests/main.nf.test      | 51 ++++++++++++++++++-
 .../main.nf                                   |  4 +-
 workflows/proteinannotator.nf                 | 42 +++++++--------
 11 files changed, 112 insertions(+), 62 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 5640258..f382849 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -89,7 +89,7 @@ process {
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
     }
-        
+
     withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:ARIA2_NMPFAMS' {
         publishDir = [
             path: { "${params.outdir}/downloaded_dbs/" },
@@ -127,11 +127,11 @@ process {
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
     }
-    
-    withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:HMMSEARCH_MROOT' {
+
+    withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:HMMSEARCH_METAGROOT' {
         ext.args   = { "-E ${params.hmmsearch_evalue_cutoff}" }
         publishDir = [
-            path: { "${params.outdir}/domain_annotation/mroot/" },
+            path: { "${params.outdir}/domain_annotation/metagroot/" },
             mode: params.publish_dir_mode,
             pattern: "*.domtbl.gz",
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
diff --git a/conf/test.config b/conf/test.config
index 5a80c47..e392923 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -28,7 +28,7 @@ params {
     pfam_latest_link    = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz'
     funfam_latest_link  = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz'
     nmpfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz'
-    mroot_latest_link   = params.pipelines_testdata_base_path + 'proteinannotator/testdata/metagroot/metagrootdb_test.hmm.gz'
+    metagroot_latest_link   = params.pipelines_testdata_base_path + 'proteinannotator/testdata/metagroot/metagrootdb_test.hmm.gz'
     // Functional annotation
     interproscan_db_url       = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan/interproscan_test.tar.gz'
     interproscan_applications = 'Hamap,TIGRFAM,sfld'
diff --git a/conf/test_full.config b/conf/test_full.config
index bcf1d96..4b63915 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -20,6 +20,7 @@ params {
     pfam_latest_link    = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz'
     funfam_latest_link  = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz'
     nmpfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz'
+    metagroot_latest_link   = params.pipelines_testdata_base_path + 'proteinannotator/testdata/metagroot/metagrootdb_test.hmm.gz'
     // Functional annotation
     interproscan_db_url       = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan_test.tar.gz'
     interproscan_applications = 'Hamap,TIGRFAM,sfld'
diff --git a/main.nf b/main.nf
index a4e9221..9f7f19b 100644
--- a/main.nf
+++ b/main.nf
@@ -49,8 +49,8 @@ workflow NFCORE_PROTEINANNOTATOR {
         params.skip_nmpfams,
         params.nmpfams_db,
         params.nmpfams_latest_link,
-        params.mroot_db,
-        params.mroot_latest_link,
+        params.metagroot_db,
+        params.metagroot_latest_link,
         params.skip_interproscan,
         params.interproscan_db_url,
         params.interproscan_db,
diff --git a/nextflow.config b/nextflow.config
index 99e06a5..af16afb 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -28,9 +28,9 @@ params {
     skip_nmpfams            = false
     nmpfams_db              = null
     nmpfams_latest_link     = "https://pavlopoulos-lab.org/envofams/databases/hmmer/nmpfamsdb.hmm.gz"
-    skip_mroot              = false
-    mroot_db                = null
-    mroot_latest_link       = "https://pavlopoulos-lab.org/envofams/databases/hmmer/metagroot.hmm.gz"
+    skip_metagroot              = false
+    metagroot_db                = null
+    metagroot_latest_link       = "https://pavlopoulos-lab.org/envofams/databases/hmmer/metagroot.hmm.gz"
     hmmsearch_evalue_cutoff = 0.001
 
     // Functional annotation
diff --git a/nextflow_schema.json b/nextflow_schema.json
index e3e9c53..ffd915d 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -293,22 +293,23 @@
                     "default": "https://pavlopoulos-lab.org/envofams/databases/hmmer/nmpfamsdb.hmm.gz",
                     "description": ""
                 },
-                "skip_mroot": {
+                "skip_metagroot": {
                     "type": "boolean",
                     "fa_icon": "fas fa-ban",
                     "description": "Skip the domain annotation with the MetagRoot database.",
                     "help": "Skips the domain annotation of input sequence against a MetagRoot database."
                 },
-                "mroot_db": {
+                "metagroot_db": {
                     "type": "string",
                     "format": "file-path",
                     "description": "Path to an already installed MetagRoot HMM database (.hmm.gz).",
-                    "help_text": "If left null and skip_mroot is false, the pipeline will start downloading the latest MetagRoot HMM library."
+                    "help_text": "If left null and skip_metagroot is false, the pipeline will start downloading the latest MetagRoot HMM library."
                 },
-                "mroot_latest_link": {
+                "metagroot_latest_link": {
                     "type": "string",
                     "default": "https://pavlopoulos-lab.org/envofams/databases/hmmer/metagroot.hmm.gz",
                     "description": "MetagRoot hosted link to the latest available MetagRoot HMM database file."
+                },
                 "hmmsearch_evalue_cutoff": {
                     "type": "number",
                     "default": 0.001,
@@ -395,7 +396,6 @@
         {
             "$ref": "#/$defs/domain_annotation_params"
         },
-
         {
             "$ref": "#/$defs/functional_annotation_options"
         },
diff --git a/subworkflows/local/domain_annotation/main.nf b/subworkflows/local/domain_annotation/main.nf
index afb1569..76692f7 100644
--- a/subworkflows/local/domain_annotation/main.nf
+++ b/subworkflows/local/domain_annotation/main.nf
@@ -1,11 +1,11 @@
 include { ARIA2 as ARIA2_PFAM                  } from '../../../modules/nf-core/aria2/main'
 include { ARIA2 as ARIA2_FUNFAM                } from '../../../modules/nf-core/aria2/main'
 include { ARIA2 as ARIA2_NMPFAMS               } from '../../../modules/nf-core/aria2/main'
-include { ARIA2 as ARIA2_MROOT                 } from '../../../modules/nf-core/aria2/main'
+include { ARIA2 as ARIA2_METAGROOT                 } from '../../../modules/nf-core/aria2/main'
 include { HMMER_HMMSEARCH as HMMSEARCH_PFAM    } from '../../../modules/nf-core/hmmer/hmmsearch/main'
 include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM  } from '../../../modules/nf-core/hmmer/hmmsearch/main'
 include { HMMER_HMMSEARCH as HMMSEARCH_NMPFAMS } from '../../../modules/nf-core/hmmer/hmmsearch/main'
-include { HMMER_HMMSEARCH as HMMSEARCH_MROOT   } from '../../../modules/nf-core/hmmer/hmmsearch/main'
+include { HMMER_HMMSEARCH as HMMSEARCH_METAGROOT   } from '../../../modules/nf-core/hmmer/hmmsearch/main'
 
 workflow DOMAIN_ANNOTATION {
     take:
@@ -19,9 +19,9 @@ workflow DOMAIN_ANNOTATION {
     skip_nmpfams           // boolean
     nmpfams_db             // string
     nmpfams_latest_link    // string
-    skip_mroot             // boolean
-    mroot_db               // string, path to the metagroot HMM database, if already exists
-    mroot_latest_link      // string, path to the latest metagroot HMM database, to download
+    skip_metagroot             // boolean
+    metagroot_db               // string, path to the metagroot HMM database, if already exists
+    metagroot_latest_link      // string, path to the latest metagroot HMM database, to download
 
     main:
 
@@ -29,7 +29,7 @@ workflow DOMAIN_ANNOTATION {
     ch_pfam_domains    = channel.empty()
     ch_funfam_domains  = channel.empty()
     ch_nmpfams_domains = channel.empty()
-    ch_mroot_domains   = channel.empty()
+    ch_metagroot_domains   = channel.empty()
 
     if (!skip_pfam) {
         if (!pfam_db) {
@@ -91,30 +91,30 @@ workflow DOMAIN_ANNOTATION {
         ch_nmpfams_domains = HMMSEARCH_NMPFAMS.out.domain_summary
     }
 
-    if (!skip_mroot) {
-        if (!mroot_db) {
-            ch_mroot_link = channel.of([ [ id: 'mroot' ], mroot_latest_link ])
+    if (!skip_metagroot) {
+        if (!metagroot_db) {
+            ch_metagroot_link = channel.of([ [ id: 'metagroot' ], metagroot_latest_link ])
 
-            ARIA2_MROOT( ch_mroot_link )
-            ch_versions = ch_versions.mix( ARIA2_MROOT.out.versions )
-            ch_mroot_db = ARIA2_MROOT.out.downloaded_file
+            ARIA2_METAGROOT( ch_metagroot_link )
+            ch_versions = ch_versions.mix( ARIA2_METAGROOT.out.versions )
+            ch_metagroot_db = ARIA2_METAGROOT.out.downloaded_file
         } else {
-            ch_mroot_db = channel.of([ [ id: 'mroot' ], mroot_db ])
+            ch_metagroot_db = channel.of([ [ id: 'metagroot' ], metagroot_db ])
         }
 
-        ch_input_for_hmmsearch_mroot = ch_fasta
-            .combine(ch_mroot_db)
+        ch_input_for_hmmsearch_metagroot = ch_fasta
+            .combine(ch_metagroot_db)
             .map{ meta, seqs, _meta2, models -> [meta, models, seqs, false, false, true] }
 
-        HMMSEARCH_MROOT( ch_input_for_hmmsearch_mroot )
-        ch_versions = ch_versions.mix( HMMSEARCH_MROOT.out.versions.first() )
-        ch_mroot_domains = HMMSEARCH_MROOT.out.domain_summary
+        HMMSEARCH_METAGROOT( ch_input_for_hmmsearch_metagroot )
+        ch_versions = ch_versions.mix( HMMSEARCH_METAGROOT.out.versions.first() )
+        ch_metagroot_domains = HMMSEARCH_METAGROOT.out.domain_summary
     }
 
     emit:
     pfam_domains    = ch_pfam_domains
     funfam_domains  = ch_funfam_domains
     nmpfams_domains = ch_nmpfams_domains
-    mroot_domains   = ch_mroot_domains
+    metagroot_domains   = ch_metagroot_domains
     versions        = ch_versions
 }
diff --git a/subworkflows/local/domain_annotation/meta.yml b/subworkflows/local/domain_annotation/meta.yml
index f408ea1..b60228b 100644
--- a/subworkflows/local/domain_annotation/meta.yml
+++ b/subworkflows/local/domain_annotation/meta.yml
@@ -54,15 +54,15 @@ input:
       type: string
       description: |
         Path to the latest nmpfamsDB HMM database, to download
-  - skip_mroot:
+  - skip_metagroot:
       type: boolean
       description: |
         Skip domain annotation with MetagRoot
-  - mroot_db:
+  - metagroot_db:
       type: string
       description: |
         Path to an existing HMM MetagRoot library on the system. If provided, the ARIA2_METAGROOT db download will be skipped.
-  - mroot_latest_link:
+  - metagroot_latest_link:
       type: string
       description: |
         Path to the latest MetagRoot HMM database, to download
@@ -79,7 +79,7 @@ output:
       type: file
       description: |
         domtbl.gz files with nmpfams domain annotation for input amino acid sequences
-  - mroot_domains:
+  - metagroot_domains:
       type: file
       description: |
         domtbl.gz files with metagroot domain annotation for input amino acid sequences
diff --git a/subworkflows/local/domain_annotation/tests/main.nf.test b/subworkflows/local/domain_annotation/tests/main.nf.test
index 18030f4..d96a12f 100644
--- a/subworkflows/local/domain_annotation/tests/main.nf.test
+++ b/subworkflows/local/domain_annotation/tests/main.nf.test
@@ -22,6 +22,9 @@ nextflow_workflow {
                 input[7] = true                                                                                    // skip_nmpfams
                 input[8] = null                                                                                    // nmpfams_db
                 input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz'         // nmpfams_latest_link
+                input[10] = true                                                                                   // skip_metagroot
+                input[11] = null                                                                                   // metagroot_db
+                input[12] = params.pipelines_testdata_base_path + '/testdata/metagroot/metagroot_test.hmm.gz'      // metagroot_latest_link
                 """
             }
         }
@@ -56,6 +59,9 @@ nextflow_workflow {
                 input[7] = true                                                                                    // skip_nmpfams
                 input[8] = null                                                                                    // nmpfams_db
                 input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz'         // nmpfams_latest_link
+                input[10] = true                                                                                   // skip_metagroot
+                input[11] = null                                                                                   // metagroot_db
+                input[12] = params.pipelines_testdata_base_path + '/testdata/metagroot/metagroot_test.hmm.gz'      // metagroot_latest_link
                 """
             }
         }
@@ -89,6 +95,9 @@ nextflow_workflow {
                 input[7] = false                                                                                   // skip_nmpfams
                 input[8] = null                                                                                    // nmpfams_db
                 input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz'         // nmpfams_latest_link
+                input[10] = true                                                                                   // skip_metagroot
+                input[11] = null                                                                                   // metagroot_db
+                input[12] = params.pipelines_testdata_base_path + '/testdata/metagroot/metagroot_test.hmm.gz'      // metagroot_latest_link
                 """
             }
         }
@@ -104,6 +113,43 @@ nextflow_workflow {
         }
     }
 
+    test("faa - metagroot") {
+
+        when {
+            workflow {
+                """
+                input[0] = channel.of([
+                    [ id: 'test' ],
+                    file(params.pipelines_testdata_base_path + '/testdata/sequences/test_proteins.faa', checkIfExists: true)
+                ])
+                input[1] = true                                                                                    // skip_pfam
+                input[2] = null                                                                                    // pfam_db
+                input[3] = params.pipelines_testdata_base_path + '/testdata/pfam/Pfam-A_test.hmm.gz'               // pfam_latest_link
+                input[4] = true                                                                                    // skip_funfam
+                input[5] = null                                                                                    // funfam_db
+                input[6] = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' // funfam_latest_link
+                input[7] = true                                                                                    // skip_nmpfams
+                input[8] = null                                                                                    // nmpfams_db
+                input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz'         // nmpfams_latest_link
+                input[10] = false                                                                                  // skip_metagroot
+                input[11] = null                                                                                   // metagroot_db
+                input[12] = params.pipelines_testdata_base_path + '/testdata/metagroot/metagroot_test.hmm.gz'      // metagroot_latest_link
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success},
+                { assert snapshot(
+                    path(workflow.out.pfam_domains[0][1]).linesGzip[0..7],
+                    path(workflow.out.funfam_domains[0][1]).linesGzip[0..7],
+                    workflow.out.versions.collect { path(it).yaml }.unique()
+                    ).match()}
+            )
+        }
+    }
+
     test("faa - domain annotation - stub") {
 
         options "-stub"
@@ -123,7 +169,10 @@ nextflow_workflow {
                 input[6] = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' // funfam_latest_link
                 input[7] = false                                                                                   // skip_nmpfams
                 input[8] = null                                                                                    // nmpfams_db
-                input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz'           // nmpfams_latest_link
+                input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz'         // nmpfams_latest_link
+                input[10] = false                                                                                  // skip_metagroot
+                input[11] = null                                                                                   // metagroot_db
+                input[12] = params.pipelines_testdata_base_path + '/testdata/metagroot/metagroot_test.hmm.gz'      // metagroot_latest_link
                 """
             }
         }
diff --git a/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf
index 8a67633..ded790e 100644
--- a/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf
@@ -180,7 +180,7 @@ def toolCitationText() {
         params.skip_preprocessing ? "" : "Input sequences were preprocessed with SeqKit (gap trimming, length filtering, validation, duplicate removal) (Shen et al. 2024)."
     ].join(' ').trim()
 
-    def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_nmpfams && params.skip_mroot) ? "" : "Domains were annotated with hmmer/hmmsearch (Eddy et al. 2011)."
+    def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_nmpfams && params.skip_metagroot) ? "" : "Domains were annotated with hmmer/hmmsearch (Eddy et al. 2011)."
 
     def prediction_text = params.skip_s4pred ? "" : "Secondary structures were predicted via the s4pred software (Moffat et al. 2021)."
 
@@ -202,7 +202,7 @@ def toolBibliographyText() {
         params.skip_preprocessing ? '' : '<li>Shen, W., Sipos, B., & Zhao, L. (2024). SeqKit2: A Swiss army knife for sequence and alignment processing. Imeta, 3(3), e191. doi: <a href="https://doi.org/10.1002/imt2.191">10.1002/imt2.191</a></li>'
     ].join(' ').trim()
 
-    def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_nmpfams && params.skip_mroot) ? '' : '<li>Eddy, S. R. (2011). Accelerated profile HMM searches. PLoS computational biology, 7(10), e1002195. doi: <a href="https://doi.org/10.1371/journal.pcbi.1002195">10.1371/journal.pcbi.1002195</a></li>'
+    def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_nmpfams && params.skip_metagroot) ? '' : '<li>Eddy, S. R. (2011). Accelerated profile HMM searches. PLoS computational biology, 7(10), e1002195. doi: <a href="https://doi.org/10.1371/journal.pcbi.1002195">10.1371/journal.pcbi.1002195</a></li>'
 
     def prediction_text = params.skip_s4pred ? '' : '<li>Moffat, L., & Jones, D. T. (2021). Increasing the accuracy of single sequence prediction methods using a deep semi-supervised learning framework. Bioinformatics, 37(21), 3744-3751. doi: <a href="https://doi.org/10.1093/bioinformatics/btab491">10.1093/bioinformatics/btab491</a></li>'
 
diff --git a/workflows/proteinannotator.nf b/workflows/proteinannotator.nf
index 55aa8fe..a0d99fa 100644
--- a/workflows/proteinannotator.nf
+++ b/workflows/proteinannotator.nf
@@ -21,24 +21,24 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_prot
 
 workflow PROTEINANNOTATOR {
     take:
-    ch_samplesheet      // channel: samplesheet read in from --input
-    skip_preprocessing  // boolean
-    skip_pfam           // boolean
-    pfam_db             // string, path to the pfam HMM database, if already exists
-    pfam_latest_link    // string, path to the latest pfam HMM database, to download
-    skip_funfam         // boolean
-    funfam_db           // string, path to the pfam HMM database, if already exists
-    funfam_latest_link  // string, path to the latest pfam HMM database, to download
-    skip_nmpfams        // boolean
-    nmpfams_db          // string
-    nmpfams_latest_link // string
-    skip_mroot          // boolean
-    mroot_db            // string, path to the metagroot HMM database, if already exists
-    mroot_latest_link   // string, path to the latest metagroot HMM database, to download
-    skip_interproscan   // boolean
-    interproscan_db_url // string, url to download db
-    interproscan_db     // string, existing db
-    skip_s4pred         // boolean
+    ch_samplesheet          // channel: samplesheet read in from --input
+    skip_preprocessing      // boolean
+    skip_pfam               // boolean
+    pfam_db                 // string, path to the pfam HMM database, if already exists
+    pfam_latest_link        // string, path to the latest pfam HMM database, to download
+    skip_funfam             // boolean
+    funfam_db               // string, path to the pfam HMM database, if already exists
+    funfam_latest_link      // string, path to the latest pfam HMM database, to download
+    skip_nmpfams            // boolean
+    nmpfams_db              // string
+    nmpfams_latest_link     // string
+    skip_metagroot          // boolean
+    metagroot_db            // string, path to the metagroot HMM database, if already exists
+    metagroot_latest_link   // string, path to the latest metagroot HMM database, to download
+    skip_interproscan       // boolean
+    interproscan_db_url     // string, url to download db
+    interproscan_db         // string, existing db
+    skip_s4pred             // boolean
 
     main:
 
@@ -59,9 +59,9 @@ workflow PROTEINANNOTATOR {
         skip_nmpfams,
         nmpfams_db,
         nmpfams_latest_link,
-        skip_mroot,
-        mroot_db,
-        mroot_latest_link
+        skip_metagroot,
+        metagroot_db,
+        metagroot_latest_link
     )
     ch_versions = ch_versions.mix( DOMAIN_ANNOTATION.out.versions )
 

From f1372263298f214d4e089ce9b066232bbe3bd388 Mon Sep 17 00:00:00 2001
From: angelphanth <angel.phanth@hotmail.com>
Date: Mon, 30 Mar 2026 17:26:42 +0100
Subject: [PATCH 05/14] align = and comments

---
 conf/modules.config                          |  8 ++++
 nextflow.config                              |  6 +--
 subworkflows/local/domain_annotation/main.nf | 40 ++++++++++----------
 3 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index f382849..b325242 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -98,6 +98,14 @@ process {
         ]
     }
 
+    withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:ARIA2_METAGROOT' {
+        publishDir = [
+            path: { "${params.outdir}/downloaded_dbs/" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
     withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:HMMSEARCH_PFAM' {
         ext.args   = { "-E ${params.hmmsearch_evalue_cutoff}" }
         publishDir = [
diff --git a/nextflow.config b/nextflow.config
index af16afb..b96d124 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -28,9 +28,9 @@ params {
     skip_nmpfams            = false
     nmpfams_db              = null
     nmpfams_latest_link     = "https://pavlopoulos-lab.org/envofams/databases/hmmer/nmpfamsdb.hmm.gz"
-    skip_metagroot              = false
-    metagroot_db                = null
-    metagroot_latest_link       = "https://pavlopoulos-lab.org/envofams/databases/hmmer/metagroot.hmm.gz"
+    skip_metagroot          = false
+    metagroot_db            = null
+    metagroot_latest_link   = "https://pavlopoulos-lab.org/envofams/databases/hmmer/metagroot.hmm.gz"
     hmmsearch_evalue_cutoff = 0.001
 
     // Functional annotation
diff --git a/subworkflows/local/domain_annotation/main.nf b/subworkflows/local/domain_annotation/main.nf
index 76692f7..456d620 100644
--- a/subworkflows/local/domain_annotation/main.nf
+++ b/subworkflows/local/domain_annotation/main.nf
@@ -1,11 +1,11 @@
-include { ARIA2 as ARIA2_PFAM                  } from '../../../modules/nf-core/aria2/main'
-include { ARIA2 as ARIA2_FUNFAM                } from '../../../modules/nf-core/aria2/main'
-include { ARIA2 as ARIA2_NMPFAMS               } from '../../../modules/nf-core/aria2/main'
-include { ARIA2 as ARIA2_METAGROOT                 } from '../../../modules/nf-core/aria2/main'
-include { HMMER_HMMSEARCH as HMMSEARCH_PFAM    } from '../../../modules/nf-core/hmmer/hmmsearch/main'
-include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM  } from '../../../modules/nf-core/hmmer/hmmsearch/main'
-include { HMMER_HMMSEARCH as HMMSEARCH_NMPFAMS } from '../../../modules/nf-core/hmmer/hmmsearch/main'
-include { HMMER_HMMSEARCH as HMMSEARCH_METAGROOT   } from '../../../modules/nf-core/hmmer/hmmsearch/main'
+include { ARIA2 as ARIA2_PFAM                       } from '../../../modules/nf-core/aria2/main'
+include { ARIA2 as ARIA2_FUNFAM                     } from '../../../modules/nf-core/aria2/main'
+include { ARIA2 as ARIA2_NMPFAMS                    } from '../../../modules/nf-core/aria2/main'
+include { ARIA2 as ARIA2_METAGROOT                  } from '../../../modules/nf-core/aria2/main'
+include { HMMER_HMMSEARCH as HMMSEARCH_PFAM         } from '../../../modules/nf-core/hmmer/hmmsearch/main'
+include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM       } from '../../../modules/nf-core/hmmer/hmmsearch/main'
+include { HMMER_HMMSEARCH as HMMSEARCH_NMPFAMS      } from '../../../modules/nf-core/hmmer/hmmsearch/main'
+include { HMMER_HMMSEARCH as HMMSEARCH_METAGROOT    } from '../../../modules/nf-core/hmmer/hmmsearch/main'
 
 workflow DOMAIN_ANNOTATION {
     take:
@@ -19,17 +19,17 @@ workflow DOMAIN_ANNOTATION {
     skip_nmpfams           // boolean
     nmpfams_db             // string
     nmpfams_latest_link    // string
-    skip_metagroot             // boolean
-    metagroot_db               // string, path to the metagroot HMM database, if already exists
-    metagroot_latest_link      // string, path to the latest metagroot HMM database, to download
+    skip_metagroot         // boolean
+    metagroot_db           // string, path to the metagroot HMM database, if already exists
+    metagroot_latest_link  // string, path to the latest metagroot HMM database, to download
 
     main:
 
-    ch_versions        = channel.empty()
-    ch_pfam_domains    = channel.empty()
-    ch_funfam_domains  = channel.empty()
-    ch_nmpfams_domains = channel.empty()
-    ch_metagroot_domains   = channel.empty()
+    ch_versions             = channel.empty()
+    ch_pfam_domains         = channel.empty()
+    ch_funfam_domains       = channel.empty()
+    ch_nmpfams_domains      = channel.empty()
+    ch_metagroot_domains    = channel.empty()
 
     if (!skip_pfam) {
         if (!pfam_db) {
@@ -112,9 +112,9 @@ workflow DOMAIN_ANNOTATION {
     }
 
     emit:
-    pfam_domains    = ch_pfam_domains
-    funfam_domains  = ch_funfam_domains
-    nmpfams_domains = ch_nmpfams_domains
+    pfam_domains        = ch_pfam_domains
+    funfam_domains      = ch_funfam_domains
+    nmpfams_domains     = ch_nmpfams_domains
     metagroot_domains   = ch_metagroot_domains
-    versions        = ch_versions
+    versions            = ch_versions
 }

From a06b808d79aedca7f20447fbcbc645f964d6fd72 Mon Sep 17 00:00:00 2001
From: angelphanth <angel.phanth@hotmail.com>
Date: Mon, 30 Mar 2026 17:54:07 +0100
Subject: [PATCH 06/14] remove nf-core/module wget

---
 modules.json                                 |  5 --
 modules/nf-core/wget/environment.yml         |  7 --
 modules/nf-core/wget/main.nf                 | 48 --------------
 modules/nf-core/wget/meta.yml                | 52 ---------------
 modules/nf-core/wget/tests/main.nf.test      | 62 -----------------
 modules/nf-core/wget/tests/main.nf.test.snap | 70 --------------------
 modules/nf-core/wget/tests/nextflow.config   |  6 --
 7 files changed, 250 deletions(-)
 delete mode 100644 modules/nf-core/wget/environment.yml
 delete mode 100644 modules/nf-core/wget/main.nf
 delete mode 100644 modules/nf-core/wget/meta.yml
 delete mode 100644 modules/nf-core/wget/tests/main.nf.test
 delete mode 100644 modules/nf-core/wget/tests/main.nf.test.snap
 delete mode 100644 modules/nf-core/wget/tests/nextflow.config

diff --git a/modules.json b/modules.json
index 6a31b58..37ba5b8 100644
--- a/modules.json
+++ b/modules.json
@@ -59,11 +59,6 @@
                         "branch": "master",
                         "git_sha": "447f7bc0fa41dfc2400c8cad4c0291880dc060cf",
                         "installed_by": ["modules"]
-                    },
-                    "wget": {
-                        "branch": "master",
-                        "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
-                        "installed_by": ["modules"]
                     }
                 }
             },
diff --git a/modules/nf-core/wget/environment.yml b/modules/nf-core/wget/environment.yml
deleted file mode 100644
index 9eb304e..0000000
--- a/modules/nf-core/wget/environment.yml
+++ /dev/null
@@ -1,7 +0,0 @@
----
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - conda-forge::wget=1.21.4
diff --git a/modules/nf-core/wget/main.nf b/modules/nf-core/wget/main.nf
deleted file mode 100644
index 9bc6f15..0000000
--- a/modules/nf-core/wget/main.nf
+++ /dev/null
@@ -1,48 +0,0 @@
-process WGET {
-    tag "$meta.id"
-    label 'process_single'
-
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/3b/3b54fa9135194c72a18d00db6b399c03248103f87e43ca75e4b50d61179994b3/data':
-        'community.wave.seqera.io/library/wget:1.21.4--8b0fcde81c17be5e' }"
-
-    input:
-    tuple val(meta), val(url)
-
-    output:
-    tuple val(meta), path("${prefix}.${suffix}"), emit: outfile
-    path "versions.yml"                         , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args   ?: ''
-    prefix   = task.ext.prefix ?: "${meta.id}"
-    suffix   = task.ext.suffix ?: 'html'
-    """
-    wget \\
-        -O - \\
-        $args \\
-        $url \\
-        > ${prefix}.${suffix}
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        wget: \$(wget --version | head -1 | cut -d ' ' -f 3)
-    END_VERSIONS
-    """
-
-    stub:
-    prefix   = task.ext.prefix ?: "${meta.id}"
-    suffix   = task.ext.suffix ?: 'html'
-    """
-    touch ${prefix}.${suffix}
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        wget: \$(wget --version | head -1 | cut -d ' ' -f 3)
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/wget/meta.yml b/modules/nf-core/wget/meta.yml
deleted file mode 100644
index 56df0af..0000000
--- a/modules/nf-core/wget/meta.yml
+++ /dev/null
@@ -1,52 +0,0 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
-name: "wget"
-description: The non-interactive network downloader
-keywords:
-  - "wget"
-  - "download"
-  - "network"
-tools:
-  - "wget":
-      description: "wget is a free utility for non-interactive download of files from
-        the Web."
-      homepage: "https://www.gnu.org/software/wget/"
-      documentation: "https://www.gnu.org/software/wget/manual/wget.html"
-      licence: ["GPL"]
-      identifier: ""
-
-input:
-  - - meta:
-        type: map
-        description: |
-          Groovy Map containing sample information
-          e.g. `[ id:'sample1' ]`
-    - url:
-        type: string
-        description: URL to download
-        pattern: "^https?://*.*"
-
-output:
-  outfile:
-    - - meta:
-          type: map
-          description: |
-            Groovy Map containing sample information
-            e.g. `[ id:'sample1' ]`
-      - ${prefix}.${suffix}:
-          type: file
-          description: Downloaded file
-          pattern: "*.*"
-
-          ontologies: []
-  versions:
-    - versions.yml:
-        type: file
-        description: File containing software versions
-        pattern: "versions.yml"
-
-        ontologies:
-          - edam: http://edamontology.org/format_3750 # YAML
-authors:
-  - "@itrujnara"
-maintainers:
-  - "@itrujnara"
diff --git a/modules/nf-core/wget/tests/main.nf.test b/modules/nf-core/wget/tests/main.nf.test
deleted file mode 100644
index e094288..0000000
--- a/modules/nf-core/wget/tests/main.nf.test
+++ /dev/null
@@ -1,62 +0,0 @@
-// nf-core modules test wget
-nextflow_process {
-
-    name "Test Process WGET"
-    script "../main.nf"
-    process "WGET"
-
-    tag "modules"
-    tag "modules_nfcore"
-    tag "wget"
-
-    test("sarscov2 - gff") {
-
-        config "./nextflow.config"
-
-        when {
-            process {
-                """
-                input[0] = [
-                    [ id:'test' ], // meta map
-                    "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/sarscov2/genome/genome.gff3",
-                ]
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(process.out).match() }
-            )
-        }
-
-    }
-
-    test("sarscov2 - gff - stub") {
-
-        options "-stub"
-
-        config "./nextflow.config"
-
-        when {
-            process {
-                """
-                input[0] = [
-                    [ id:'test', single_end:false ], // meta map
-                    "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/sarscov2/genome/genome.gff3",
-                ]
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(process.out).match() }
-            )
-        }
-
-    }
-
-}
diff --git a/modules/nf-core/wget/tests/main.nf.test.snap b/modules/nf-core/wget/tests/main.nf.test.snap
deleted file mode 100644
index 6c05160..0000000
--- a/modules/nf-core/wget/tests/main.nf.test.snap
+++ /dev/null
@@ -1,70 +0,0 @@
-{
-    "sarscov2 - gff": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.gff3:md5,357435a81a9981a0128e840ebe11051e"
-                    ]
-                ],
-                "1": [
-                    "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35"
-                ],
-                "outfile": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.gff3:md5,357435a81a9981a0128e840ebe11051e"
-                    ]
-                ],
-                "versions": [
-                    "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35"
-                ]
-            }
-        ],
-        "meta": {
-            "nf-test": "0.9.0",
-            "nextflow": "24.10.4"
-        },
-        "timestamp": "2025-03-26T12:27:32.67617"
-    },
-    "sarscov2 - gff - stub": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "1": [
-                    "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35"
-                ],
-                "outfile": [
-                    [
-                        {
-                            "id": "test",
-                            "single_end": false
-                        },
-                        "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "versions": [
-                    "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35"
-                ]
-            }
-        ],
-        "meta": {
-            "nf-test": "0.9.0",
-            "nextflow": "24.10.4"
-        },
-        "timestamp": "2025-03-26T12:21:06.414955"
-    }
-}
\ No newline at end of file
diff --git a/modules/nf-core/wget/tests/nextflow.config b/modules/nf-core/wget/tests/nextflow.config
deleted file mode 100644
index 236f4e1..0000000
--- a/modules/nf-core/wget/tests/nextflow.config
+++ /dev/null
@@ -1,6 +0,0 @@
-process {
-    withName: "WGET" {
-        ext.prefix = "test"
-        ext.suffix = "gff3"
-    }
-}

From 35c56e9b81ad20952527e3eb5872db1f5d4f83bd Mon Sep 17 00:00:00 2001
From: angelphanth <angel.phanth@hotmail.com>
Date: Mon, 30 Mar 2026 17:54:53 +0100
Subject: [PATCH 07/14] add MetagRoot to docs

---
 CHANGELOG.md   |  1 +
 README.md      |  2 +-
 docs/output.md | 11 +++++++----
 docs/usage.md  |  2 +-
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 202bd64..69eb988 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Added`
 
+- [#90](https://github.com/nf-core/proteinannotator/pull/90) - Added the option to download and use the latest `MetagRoot` HMM library (or use path to an existing one) for domain annotation. (by @angelphanth)
 - [#87](https://github.com/nf-core/proteinannotator/pull/87) - Added the option to download and use the latest `NMPFams` HMM library (or use path to an existing one) for domain annotation. (by @npechl)
 - [#85](https://github.com/nf-core/proteinannotator/pull/85) - Added zenodo doi in `nextflow.config`. (by @vagkaratzas)
 
diff --git a/README.md b/README.md
index 2b8f037..f128e0a 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Generate input amino acid sequence statistics with ([`SeqFu`](https://github.com
 ### Annotate sequences
 
 1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases
-   such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams](https://pavlopoulos-lab.org/envofams/databases/hmmer/)
+   such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams and MetagRoot](https://pavlopoulos-lab.org/envofams/databases/hmmer/)
 2. Functional annotation:
    - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics.
 3. Predict secondary structure compositional features such as α-helices, β-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred))
diff --git a/docs/output.md b/docs/output.md
index 0e6387f..3a8e1ec 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -14,9 +14,9 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
   - [SeqFu](#seqfu) for input amino acid sequences quality control (QC)
   - [SeqKit](#seqkit) for preprocessing input amino acid sequences (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences)
 - [Database download](#database-download) Optionally download selected databases for annotation.
-  - [aria2](#aria2) - To optionally download the Pfam, FunFam, NMPFams and/or InterProScan databases through the pipeline.
+  - [aria2](#aria2) - To optionally download the Pfam, FunFam, NMPFams, MetagRoot and/or InterProScan databases through the pipeline.
 - [Domain annotation](#domain-annotation) Annotate proteins with domains from established repositories.
-  - [hmmer](#hmmer) - To optionally match the input sequence to known Pfam, FunFam and/or NMPFams domains through `hmmer/hmmsearch`
+  - [hmmer](#hmmer) - To optionally match the input sequence to known Pfam, FunFam, NMPFams and/or MetagRoot domains through `hmmer/hmmsearch`
 - [Functional annotation](#functional-annotation) Annotate proteins with functional domains
   - [InterProScan](#Interproscan) - Search the InterProScan database for functional domains
 - [s4pred](#s4pred) - Predict secondary structures of sequences, producing amino acid level probabilities of forming an α-helix, a β-strand or a coil.
@@ -73,10 +73,11 @@ The `seqkit` module is used for initial preprocessing (i.e., gap removal, conver
   - `interproscan_test.tar.gz`: (optional) the downloaded InterProScan archive of member databases according to the optional user-provided url
   - `funfam-hmm3-v4_3_0*.lib.gz`: (optional) The latest (v4_3_0) full, or a minimal test, FunFam HMM database that can be downloaded through the pipeline.
   - `nmpfamsdb.hmm.gz`: (optional) The latest full, or a minimal test, NMPFams HMM database that can be downloaded through the pipeline.
+  - `metagroot.hmm.gz`: (optional) The latest full, or a minimal test, MetagRoot HMM database that can be downloaded through the pipeline.
 
 </details>
 
-If the `skip_*` flags (e.g., `skip_pfam`, `skip_funfam`, `skip_nmpfams`, `skip_interproscan`) for each annotation database is set to `true`, or the `*_db` parameter paths (e.g., `pfam_db`, `funfam_db`, `nmpfams_db`, `interproscan_db`) are set (i.e., not `null`), or the run is resumed after a successful database download, then the respective database will not be (re)downloaded. The full database links can be found in the main `nextflow.config` file, while minimal test versions can be found in the `test` and `test_full` profiles (i.e., `conf/test.config`, `conf/test_full.config`).
+If the `skip_*` flags (e.g., `skip_pfam`, `skip_funfam`, `skip_nmpfams`, `skip_metagroot`, `skip_interproscan`) for each annotation database is set to `true`, or the `*_db` parameter paths (e.g., `pfam_db`, `funfam_db`, `nmpfams_db`, `metagroot_db`, `interproscan_db`) are set (i.e., not `null`), or the run is resumed after a successful database download, then the respective database will not be (re)downloaded. The full database links can be found in the main `nextflow.config` file, while minimal test versions can be found in the `test` and `test_full` profiles (i.e., `conf/test.config`, `conf/test_full.config`).
 
 [aria2](https://github.com/aria2/aria2/) is a lightweight multi-protocol & multi-source, cross platform download utility operated in command-line. It supports HTTP/HTTPS, FTP, SFTP, BitTorrent and Metalink.
 
@@ -94,10 +95,12 @@ If the `skip_*` flags (e.g., `skip_pfam`, `skip_funfam`, `skip_nmpfams`, `skip_i
     - `<samplename>.domtbl.gz`: `hmmer/hmmsearch` results along parameters info.
   - `nmpfams/`
     - `<samplename>.domtbl.gz`: `hmmer/hmmsearch` results along parameters info.
+  - `metagroot/`
+    - `<samplename>.domtbl.gz`: `hmmer/hmmsearch` results along parameters info.
 
 </details>
 
-Each of the `domain_annotation/` subfolders (e.g., `pfam`, `funfam`, `nmpfams`) contain a `.domtbl.gz` annotation file per input sample, depending on which domain annotation databases were used in the pipeline execution.
+Each of the `domain_annotation/` subfolders (e.g., `pfam`, `funfam`, `nmpfams`, `metagroot`) contain a `.domtbl.gz` annotation file per input sample, depending on which domain annotation databases were used in the pipeline execution.
 
 [hmmer](https://github.com/EddyRivasLab/hmmer) is a fast and flexible alignment trimming tool that keeps phylogenetically informative sites and removes others.
 
diff --git a/docs/usage.md b/docs/usage.md
index 72d53cc..a2c17cc 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -7,7 +7,7 @@
 ## Introduction
 
 **nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics and generates sequence-level annotations for amino acid sequences.
-It takes a protein FASTA file as input and performs conserved domain annotation (using Pfam, FunFam and NMPFams HMM databases), functional annotation (using InterProScan), and secondary structure prediction (using s4pred).
+It takes a protein FASTA file as input and performs conserved domain annotation (using Pfam, FunFam, NMPFams and MetagRoot HMM databases), functional annotation (using InterProScan), and secondary structure prediction (using s4pred).
 Optionally, paths to pre-downloaded databases can be provided to skip the automatic download steps and speed up repeated runs.
 
 ## Samplesheet input

From 38d2c48c54ced72e0bb756f5cb72690fe521922c Mon Sep 17 00:00:00 2001
From: angelphanth <angel.phanth@hotmail.com>
Date: Mon, 30 Mar 2026 17:55:20 +0100
Subject: [PATCH 08/14] align =

---
 conf/test.config      | 6 +++---
 conf/test_full.config | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index e392923..27bda92 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -25,9 +25,9 @@ params {
     // Input data
     input = params.pipelines_testdata_base_path + 'proteinannotator/samplesheet/samplesheet.csv'
     // Domain annotation
-    pfam_latest_link    = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz'
-    funfam_latest_link  = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz'
-    nmpfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz'
+    pfam_latest_link        = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz'
+    funfam_latest_link      = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz'
+    nmpfams_latest_link     = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz'
     metagroot_latest_link   = params.pipelines_testdata_base_path + 'proteinannotator/testdata/metagroot/metagrootdb_test.hmm.gz'
     // Functional annotation
     interproscan_db_url       = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan/interproscan_test.tar.gz'
diff --git a/conf/test_full.config b/conf/test_full.config
index 4b63915..02e244f 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -17,9 +17,9 @@ params {
     // Input data for full size test
     input = params.pipelines_testdata_base_path + 'proteinannotator/samplesheet/samplesheet.csv'
     // Domain annotation
-    pfam_latest_link    = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz'
-    funfam_latest_link  = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz'
-    nmpfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz'
+    pfam_latest_link        = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz'
+    funfam_latest_link      = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz'
+    nmpfams_latest_link     = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz'
     metagroot_latest_link   = params.pipelines_testdata_base_path + 'proteinannotator/testdata/metagroot/metagrootdb_test.hmm.gz'
     // Functional annotation
     interproscan_db_url       = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan_test.tar.gz'

From 6a539e95aa9663d7320c7ba513e3cb666254d896 Mon Sep 17 00:00:00 2001
From: angelphanth <angel.phanth@hotmail.com>
Date: Mon, 30 Mar 2026 17:56:00 +0100
Subject: [PATCH 09/14] update snapshot for domain_annotation tests

---
 .../domain_annotation/tests/main.nf.test      |  3 +-
 .../domain_annotation/tests/main.nf.test.snap | 55 ++++++++++++++++++-
 tests/.nftignore                              |  3 +
 3 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/subworkflows/local/domain_annotation/tests/main.nf.test b/subworkflows/local/domain_annotation/tests/main.nf.test
index d96a12f..0dc2598 100644
--- a/subworkflows/local/domain_annotation/tests/main.nf.test
+++ b/subworkflows/local/domain_annotation/tests/main.nf.test
@@ -142,8 +142,7 @@ nextflow_workflow {
             assertAll(
                 { assert workflow.success},
                 { assert snapshot(
-                    path(workflow.out.pfam_domains[0][1]).linesGzip[0..7],
-                    path(workflow.out.funfam_domains[0][1]).linesGzip[0..7],
+                    path(workflow.out.metagroot_domains[0][1]).linesGzip[0..7],
                     workflow.out.versions.collect { path(it).yaml }.unique()
                     ).match()}
             )
diff --git a/subworkflows/local/domain_annotation/tests/main.nf.test.snap b/subworkflows/local/domain_annotation/tests/main.nf.test.snap
index 80ce69a..fd4f74e 100644
--- a/subworkflows/local/domain_annotation/tests/main.nf.test.snap
+++ b/subworkflows/local/domain_annotation/tests/main.nf.test.snap
@@ -1,4 +1,35 @@
 {
+    "faa - metagroot": {
+        "content": [
+            [
+                "#                                                                            --- full sequence --- -------------- this domain -------------   hmm coord   ali coord   env coord",
+                "# target name        accession   tlen query name           accession   qlen   E-value  score  bias   #  of  c-Evalue  i-Evalue  score  bias  from    to  from    to  from    to  acc description of target",
+                "#------------------- ---------- ----- -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------",
+                "T1024                -            408 F101326              -            425   9.3e-13   34.9  26.2   1   1   1.8e-12   3.6e-12   33.0  26.2    13   351    18   340    12   407 0.74 LmrP, , 408 residues|",
+                "T1024                -            408 F226054              -            421   1.3e-13   37.4  26.4   1   1   8.6e-14   1.7e-13   37.0  26.4     2   404     2   404     1   408 0.73 LmrP, , 408 residues|",
+                "T1024                -            408 F240027              -            384   8.4e-10   25.0   5.2   1   1     8e-10   1.6e-09   24.1   5.2    26   163    26   160     6   178 0.88 LmrP, , 408 residues|",
+                "T1024                -            408 F287588              -            413     2e-10   26.9  23.3   1   1   1.6e-10   3.1e-10   26.3  23.3    48   363    42   370    30   406 0.74 LmrP, , 408 residues|",
+                "T1024                -            408 F294204              -            387   3.8e-06   12.8  25.9   1   1   2.8e-06   5.6e-06   12.3  25.9    16   372    41   406    30   408 0.76 LmrP, , 408 residues|"
+            ],
+            [
+                {
+                    "DOMAIN_ANNOTATION:HMMSEARCH_METAGROOT": {
+                        "hmmer": 3.4
+                    }
+                },
+                {
+                    "DOMAIN_ANNOTATION:ARIA2_METAGROOT": {
+                        "aria2": "1.36.0"
+                    }
+                }
+            ]
+        ],
+        "timestamp": "2026-03-30T17:28:28.71093",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.4"
+        }
+    },
     "faa - domain annotation": {
         "content": [
             [
@@ -44,7 +75,7 @@
                 }
             ]
         ],
-        "timestamp": "2026-03-13T14:51:37.636657",
+        "timestamp": "2026-03-30T17:28:01.729059",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.4"
@@ -135,9 +166,19 @@
                     ]
                 ],
                 "3": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.domtbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "4": [
+                    "versions.yml:md5,046e5161e3e1dff2ab111ddf4bb27331",
                     "versions.yml:md5,160d4c5a5001cfb4ff57b94fc52b67d9",
                     "versions.yml:md5,1b7d208e42364fb87160693faa4e83b9",
                     "versions.yml:md5,35e41735706132967dd94bb636833a4a",
+                    "versions.yml:md5,55939a7ab71dab922d448cf99472feeb",
                     "versions.yml:md5,9045f482d64e7666e62932b0578b665e",
                     "versions.yml:md5,a74a0c8fcb741e59bc14424f612b8d09",
                     "versions.yml:md5,f1d8a406d3dcb97a7c15e9c810926de1"
@@ -150,6 +191,14 @@
                         "test.domtbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
+                "metagroot_domains": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.domtbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
                 "nmpfams_domains": [
                     [
                         {
@@ -167,16 +216,18 @@
                     ]
                 ],
                 "versions": [
+                    "versions.yml:md5,046e5161e3e1dff2ab111ddf4bb27331",
                     "versions.yml:md5,160d4c5a5001cfb4ff57b94fc52b67d9",
                     "versions.yml:md5,1b7d208e42364fb87160693faa4e83b9",
                     "versions.yml:md5,35e41735706132967dd94bb636833a4a",
+                    "versions.yml:md5,55939a7ab71dab922d448cf99472feeb",
                     "versions.yml:md5,9045f482d64e7666e62932b0578b665e",
                     "versions.yml:md5,a74a0c8fcb741e59bc14424f612b8d09",
                     "versions.yml:md5,f1d8a406d3dcb97a7c15e9c810926de1"
                 ]
             }
         ],
-        "timestamp": "2026-03-13T09:45:07.520815",
+        "timestamp": "2026-03-30T17:28:37.677345",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.4"
diff --git a/tests/.nftignore b/tests/.nftignore
index 0b6bd76..6441f81 100644
--- a/tests/.nftignore
+++ b/tests/.nftignore
@@ -17,6 +17,9 @@ domain_annotation/funfam/l_arginase.domtbl.gz
 domain_annotation/nmpfams/T1024.domtbl.gz
 domain_annotation/nmpfams/T1026.domtbl.gz
 domain_annotation/nmpfams/l_arginase.domtbl.gz
+domain_annotation/metagroot/T1024.domtbl.gz
+domain_annotation/metagroot/T1026.domtbl.gz
+domain_annotation/metagroot/l_arginase.domtbl.gz
 functional_annotation/interproscan/T1024/T1024.gff3
 functional_annotation/interproscan/T1024/T1024.tsv
 functional_annotation/interproscan/T1026/T1026.gff3

From 7f8269bb682dfc803d4243162888900b53f40cf4 Mon Sep 17 00:00:00 2001
From: angelphanth <angel.phanth@hotmail.com>
Date: Mon, 30 Mar 2026 18:00:05 +0100
Subject: [PATCH 10/14] updated meta via nf-core pipelines lint

---
 ro-crate-metadata.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json
index 5028582..6cb8579 100644
--- a/ro-crate-metadata.json
+++ b/ro-crate-metadata.json
@@ -23,7 +23,7 @@
             "@type": "Dataset",
             "creativeWorkStatus": "InProgress",
             "datePublished": "2026-02-09T13:54:13+00:00",
-            "description": "<h1>\n  <picture>\n    <source media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-proteinannotator_logo_dark.png\">\n    <img alt=\"nf-core/proteinannotator\" src=\"docs/images/nf-core-proteinannotator_logo_light.png\">\n  </picture>\n</h1>\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinannotator)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.18547735-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.18547735)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinannotator)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinannotator-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinannotator)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics for protein FASTA inputs and produces protein annotations based on predicted sequence features, including conserved domains, functions, and secondary structure.\n\n<p>\n  <picture>\n    <source media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/proteinannotator_metromap_dark.png\">\n    <img alt=\"nf-core/proteinannotator\" src=\"docs/images/proteinannotator_metromap_light.png\">\n  </picture>\n</p>\n\n### Check quality and pre-process\n\nGenerate input amino acid sequence statistics with ([`SeqFu`](https://github.com/telatin/seqfu2/)) and pre-process them (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) with ([`SeqKit`](https://github.com/shenwei356/seqkit/))\n\n### Annotate sequences\n\n1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases\n   such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams](https://pavlopoulos-lab.org/envofams/databases/hmmer/)\n2. Functional annotation:\n   - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics.\n3. Predict secondary structure compositional features such as \u03b1-helices, \u03b2-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred))\n4. Present QC stats for input sequences before and after initial pre-processing with ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,fasta\nspecies1,species1_proteins.fasta\nspecies2,species2_proteins.fasta\n```\n\nEach row represents a FASTA file of proteins from a single species.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinannotator \\\n   -profile <docker/singularity/.../institute> \\\n   --input samplesheet.csv \\\n   --outdir <OUTDIR>\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinannotator/usage) and the [parameter documentation](https://nf-co.re/proteinannotator/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinannotator/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinannotator/output).\n\n## Credits\n\nnf-core/proteinannotator was originally written by Olga Botvinnik and Evangelos Karatzas.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Michael L Heuer](https://github.com/heuermh)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Eric Wei](https://github.com/eweizy)\n- [Martin Beracochea](https://github.com/mberacochea)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinannotator` channel](https://nfcore.slack.com/channels/proteinannotator) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinannotator for your analysis, please cite it using the following doi: [10.5281/zenodo.18547735](https://doi.org/10.5281/zenodo.18547735)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
+            "description": "<h1>\n  <picture>\n    <source media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-proteinannotator_logo_dark.png\">\n    <img alt=\"nf-core/proteinannotator\" src=\"docs/images/nf-core-proteinannotator_logo_light.png\">\n  </picture>\n</h1>\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinannotator)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.18547735-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.18547735)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinannotator)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinannotator-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinannotator)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics for protein FASTA inputs and produces protein annotations based on predicted sequence features, including conserved domains, functions, and secondary structure.\n\n<p>\n  <picture>\n    <source media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/proteinannotator_metromap_dark.png\">\n    <img alt=\"nf-core/proteinannotator\" src=\"docs/images/proteinannotator_metromap_light.png\">\n  </picture>\n</p>\n\n### Check quality and pre-process\n\nGenerate input amino acid sequence statistics with ([`SeqFu`](https://github.com/telatin/seqfu2/)) and pre-process them (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) with ([`SeqKit`](https://github.com/shenwei356/seqkit/))\n\n### Annotate sequences\n\n1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases\n   such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams and MetagRoot](https://pavlopoulos-lab.org/envofams/databases/hmmer/)\n2. Functional annotation:\n   - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics.\n3. Predict secondary structure compositional features such as \u03b1-helices, \u03b2-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred))\n4. Present QC stats for input sequences before and after initial pre-processing with ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,fasta\nspecies1,species1_proteins.fasta\nspecies2,species2_proteins.fasta\n```\n\nEach row represents a FASTA file of proteins from a single species.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinannotator \\\n   -profile <docker/singularity/.../institute> \\\n   --input samplesheet.csv \\\n   --outdir <OUTDIR>\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinannotator/usage) and the [parameter documentation](https://nf-co.re/proteinannotator/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinannotator/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinannotator/output).\n\n## Credits\n\nnf-core/proteinannotator was originally written by Olga Botvinnik and Evangelos Karatzas.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Michael L Heuer](https://github.com/heuermh)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Eric Wei](https://github.com/eweizy)\n- [Martin Beracochea](https://github.com/mberacochea)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinannotator` channel](https://nfcore.slack.com/channels/proteinannotator) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinannotator for your analysis, please cite it using the following doi: [10.5281/zenodo.18547735](https://doi.org/10.5281/zenodo.18547735)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
             "hasPart": [
                 {
                     "@id": "main.nf"

From 406b4cf2b72f1d5e3c9a4482ec9dee353f4df909 Mon Sep 17 00:00:00 2001
From: angelphanth <angel.phanth@hotmail.com>
Date: Tue, 31 Mar 2026 09:16:30 +0100
Subject: [PATCH 11/14] add missing skip param

---
 main.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/main.nf b/main.nf
index 9f7f19b..e29a062 100644
--- a/main.nf
+++ b/main.nf
@@ -49,6 +49,7 @@ workflow NFCORE_PROTEINANNOTATOR {
         params.skip_nmpfams,
         params.nmpfams_db,
         params.nmpfams_latest_link,
+        params.skip_metagroot,
         params.metagroot_db,
         params.metagroot_latest_link,
         params.skip_interproscan,

From 0ecbfdf762528d8c0635e31242969a086a18f3da Mon Sep 17 00:00:00 2001
From: angelphanth <angel.phanth@hotmail.com>
Date: Tue, 31 Mar 2026 10:40:13 +0100
Subject: [PATCH 12/14] rename metagrootdb_test to metagroot_test

---
 conf/test.config      | 2 +-
 conf/test_full.config | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index 27bda92..23f4a82 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -28,7 +28,7 @@ params {
     pfam_latest_link        = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz'
     funfam_latest_link      = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz'
     nmpfams_latest_link     = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz'
-    metagroot_latest_link   = params.pipelines_testdata_base_path + 'proteinannotator/testdata/metagroot/metagrootdb_test.hmm.gz'
+    metagroot_latest_link   = params.pipelines_testdata_base_path + 'proteinannotator/testdata/metagroot/metagroot_test.hmm.gz'
     // Functional annotation
     interproscan_db_url       = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan/interproscan_test.tar.gz'
     interproscan_applications = 'Hamap,TIGRFAM,sfld'
diff --git a/conf/test_full.config b/conf/test_full.config
index 02e244f..966ee53 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -20,7 +20,7 @@ params {
     pfam_latest_link        = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz'
     funfam_latest_link      = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz'
     nmpfams_latest_link     = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz'
-    metagroot_latest_link   = params.pipelines_testdata_base_path + 'proteinannotator/testdata/metagroot/metagrootdb_test.hmm.gz'
+    metagroot_latest_link   = params.pipelines_testdata_base_path + 'proteinannotator/testdata/metagroot/metagroot_test.hmm.gz'
     // Functional annotation
     interproscan_db_url       = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan_test.tar.gz'
     interproscan_applications = 'Hamap,TIGRFAM,sfld'

From 60f38c7d18a176ef6c0dc54b3454d941c3f15bbd Mon Sep 17 00:00:00 2001
From: vagkaratzas <vagelaros.gee@gmail.com>
Date: Tue, 31 Mar 2026 11:54:17 +0100
Subject: [PATCH 13/14] pipeline-level snapshot updated

---
 tests/default.nf.test.snap | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap
index 10d5d5a..48525fe 100644
--- a/tests/default.nf.test.snap
+++ b/tests/default.nf.test.snap
@@ -1,7 +1,7 @@
 {
     "-profile test": {
         "content": [
-            36,
+            40,
             {
                 "ARIA2": {
                     "aria2": "1.36.0"
@@ -9,6 +9,9 @@
                 "ARIA2_FUNFAM": {
                     "aria2": "1.36.0"
                 },
+                "ARIA2_METAGROOT": {
+                    "aria2": "1.36.0"
+                },
                 "ARIA2_NMPFAMS": {
                     "aria2": "1.36.0"
                 },
@@ -18,6 +21,9 @@
                 "HMMSEARCH_FUNFAM": {
                     "hmmer": 3.4
                 },
+                "HMMSEARCH_METAGROOT": {
+                    "hmmer": 3.4
+                },
                 "HMMSEARCH_NMPFAMS": {
                     "hmmer": 3.4
                 },
@@ -58,6 +64,10 @@
                 "domain_annotation/funfam/T1024.domtbl.gz",
                 "domain_annotation/funfam/T1026.domtbl.gz",
                 "domain_annotation/funfam/l_arginase.domtbl.gz",
+                "domain_annotation/metagroot",
+                "domain_annotation/metagroot/T1024.domtbl.gz",
+                "domain_annotation/metagroot/T1026.domtbl.gz",
+                "domain_annotation/metagroot/l_arginase.domtbl.gz",
                 "domain_annotation/nmpfams",
                 "domain_annotation/nmpfams/T1024.domtbl.gz",
                 "domain_annotation/nmpfams/T1026.domtbl.gz",
@@ -88,6 +98,7 @@
                 "downloaded_dbs/interproscan_db/tigrfam/15.0/TIGRFAMs_15.0_HMM.LIB",
                 "downloaded_dbs/interproscan_db/tigrfam/15.0/TIGRFAMs_HMM.LIB",
                 "downloaded_dbs/interproscan_test.tar.gz",
+                "downloaded_dbs/metagroot_test.hmm.gz",
                 "downloaded_dbs/nmpfamsdb_test.hmm.gz",
                 "functional_annotation",
                 "functional_annotation/interproscan",
@@ -193,6 +204,7 @@
                 "TIGRFAMs_15.0_HMM.LIB:md5,64f2b2c9e834b47b17d91bb9a6a0067e",
                 "TIGRFAMs_HMM.LIB:md5,543da3f4b65eed9ec393986c6c6ff0ba",
                 "interproscan_test.tar.gz:md5,cde88c0cd841c84dc1203e64854c762b",
+                "metagroot_test.hmm.gz:md5,d23de95bf39fb6e27ffb266ce61ac98e",
                 "nmpfamsdb_test.hmm.gz:md5,ad7a094618ccfdaeed1c03e93f6abf1e",
                 "T1024.json:md5,0288f7551a14faedc409dd374b3e073e",
                 "T1024.xml:md5,63a3db0eb0e1f76403411602c23b721e",
@@ -232,8 +244,8 @@
         ],
         "meta": {
             "nf-test": "0.9.3",
-            "nextflow": "25.10.2"
+            "nextflow": "25.10.4"
         },
-        "timestamp": "2026-03-14T10:06:42.466898492"
+        "timestamp": "2026-03-31T11:50:46.606922418"
     }
 }
\ No newline at end of file

From 972f149ab8873019043ced03d38cff58d212afa7 Mon Sep 17 00:00:00 2001
From: vagkaratzas <vagelaros.gee@gmail.com>
Date: Tue, 31 Mar 2026 12:03:10 +0100
Subject: [PATCH 14/14] rename MetagRoot to metagRoot

---
 CHANGELOG.md                                  |  2 +-
 README.md                                     |  2 +-
 docs/output.md                                |  6 +++---
 docs/usage.md                                 |  2 +-
 nextflow_schema.json                          | 10 +++++-----
 ro-crate-metadata.json                        |  2 +-
 subworkflows/local/domain_annotation/meta.yml |  6 +++---
 7 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 69eb988..88578a1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Added`
 
-- [#90](https://github.com/nf-core/proteinannotator/pull/90) - Added the option to download and use the latest `MetagRoot` HMM library (or use path to an existing one) for domain annotation. (by @angelphanth)
+- [#90](https://github.com/nf-core/proteinannotator/pull/90) - Added the option to download and use the latest `metagRoot` HMM library (or use path to an existing one) for domain annotation. (by @angelphanth)
 - [#87](https://github.com/nf-core/proteinannotator/pull/87) - Added the option to download and use the latest `NMPFams` HMM library (or use path to an existing one) for domain annotation. (by @npechl)
 - [#85](https://github.com/nf-core/proteinannotator/pull/85) - Added zenodo doi in `nextflow.config`. (by @vagkaratzas)
 
diff --git a/README.md b/README.md
index f128e0a..dac12bd 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Generate input amino acid sequence statistics with ([`SeqFu`](https://github.com
 ### Annotate sequences
 
 1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases
-   such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams and MetagRoot](https://pavlopoulos-lab.org/envofams/databases/hmmer/)
+   such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams and metagRoot](https://pavlopoulos-lab.org/envofams/databases/hmmer/)
 2. Functional annotation:
    - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics.
 3. Predict secondary structure compositional features such as α-helices, β-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred))
diff --git a/docs/output.md b/docs/output.md
index 3a8e1ec..85e3439 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -14,9 +14,9 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
   - [SeqFu](#seqfu) for input amino acid sequences quality control (QC)
   - [SeqKit](#seqkit) for preprocessing input amino acid sequences (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences)
 - [Database download](#database-download) Optionally download selected databases for annotation.
-  - [aria2](#aria2) - To optionally download the Pfam, FunFam, NMPFams, MetagRoot and/or InterProScan databases through the pipeline.
+  - [aria2](#aria2) - To optionally download the Pfam, FunFam, NMPFams, metagRoot and/or InterProScan databases through the pipeline.
 - [Domain annotation](#domain-annotation) Annotate proteins with domains from established repositories.
-  - [hmmer](#hmmer) - To optionally match the input sequence to known Pfam, FunFam, NMPFams and/or MetagRoot domains through `hmmer/hmmsearch`
+  - [hmmer](#hmmer) - To optionally match the input sequence to known Pfam, FunFam, NMPFams and/or metagRoot domains through `hmmer/hmmsearch`
 - [Functional annotation](#functional-annotation) Annotate proteins with functional domains
   - [InterProScan](#Interproscan) - Search the InterProScan database for functional domains
 - [s4pred](#s4pred) - Predict secondary structures of sequences, producing amino acid level probabilities of forming an α-helix, a β-strand or a coil.
@@ -73,7 +73,7 @@ The `seqkit` module is used for initial preprocessing (i.e., gap removal, conver
   - `interproscan_test.tar.gz`: (optional) the downloaded InterProScan archive of member databases according to the optional user-provided url
   - `funfam-hmm3-v4_3_0*.lib.gz`: (optional) The latest (v4_3_0) full, or a minimal test, FunFam HMM database that can be downloaded through the pipeline.
   - `nmpfamsdb.hmm.gz`: (optional) The latest full, or a minimal test, NMPFams HMM database that can be downloaded through the pipeline.
-  - `metagroot.hmm.gz`: (optional) The latest full, or a minimal test, MetagRoot HMM database that can be downloaded through the pipeline.
+  - `metagroot.hmm.gz`: (optional) The latest full, or a minimal test, metagRoot HMM database that can be downloaded through the pipeline.
 
 </details>
 
diff --git a/docs/usage.md b/docs/usage.md
index a2c17cc..8978d1e 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -7,7 +7,7 @@
 ## Introduction
 
 **nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics and generates sequence-level annotations for amino acid sequences.
-It takes a protein FASTA file as input and performs conserved domain annotation (using Pfam, FunFam, NMPFams and MetagRoot HMM databases), functional annotation (using InterProScan), and secondary structure prediction (using s4pred).
+It takes a protein FASTA file as input and performs conserved domain annotation (using Pfam, FunFam, NMPFams and metagRoot HMM databases), functional annotation (using InterProScan), and secondary structure prediction (using s4pred).
 Optionally, paths to pre-downloaded databases can be provided to skip the automatic download steps and speed up repeated runs.
 
 ## Samplesheet input
diff --git a/nextflow_schema.json b/nextflow_schema.json
index ffd915d..1d79c7a 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -296,19 +296,19 @@
                 "skip_metagroot": {
                     "type": "boolean",
                     "fa_icon": "fas fa-ban",
-                    "description": "Skip the domain annotation with the MetagRoot database.",
-                    "help": "Skips the domain annotation of input sequence against a MetagRoot database."
+                    "description": "Skip the domain annotation with the metagRoot database.",
+                    "help": "Skips the domain annotation of input sequence against a metagRoot database."
                 },
                 "metagroot_db": {
                     "type": "string",
                     "format": "file-path",
-                    "description": "Path to an already installed MetagRoot HMM database (.hmm.gz).",
-                    "help_text": "If left null and skip_metagroot is false, the pipeline will start downloading the latest MetagRoot HMM library."
+                    "description": "Path to an already installed metagRoot HMM database (.hmm.gz).",
+                    "help_text": "If left null and skip_metagroot is false, the pipeline will start downloading the latest metagRoot HMM library."
                 },
                 "metagroot_latest_link": {
                     "type": "string",
                     "default": "https://pavlopoulos-lab.org/envofams/databases/hmmer/metagroot.hmm.gz",
-                    "description": "MetagRoot hosted link to the latest available MetagRoot HMM database file."
+                    "description": "metagRoot hosted link to the latest available metagRoot HMM database file."
                 },
                 "hmmsearch_evalue_cutoff": {
                     "type": "number",
diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json
index 6cb8579..e3f06a1 100644
--- a/ro-crate-metadata.json
+++ b/ro-crate-metadata.json
@@ -23,7 +23,7 @@
             "@type": "Dataset",
             "creativeWorkStatus": "InProgress",
             "datePublished": "2026-02-09T13:54:13+00:00",
-            "description": "<h1>\n  <picture>\n    <source media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-proteinannotator_logo_dark.png\">\n    <img alt=\"nf-core/proteinannotator\" src=\"docs/images/nf-core-proteinannotator_logo_light.png\">\n  </picture>\n</h1>\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinannotator)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.18547735-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.18547735)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinannotator)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinannotator-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinannotator)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics for protein FASTA inputs and produces protein annotations based on predicted sequence features, including conserved domains, functions, and secondary structure.\n\n<p>\n  <picture>\n    <source media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/proteinannotator_metromap_dark.png\">\n    <img alt=\"nf-core/proteinannotator\" src=\"docs/images/proteinannotator_metromap_light.png\">\n  </picture>\n</p>\n\n### Check quality and pre-process\n\nGenerate input amino acid sequence statistics with ([`SeqFu`](https://github.com/telatin/seqfu2/)) and pre-process them (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) with ([`SeqKit`](https://github.com/shenwei356/seqkit/))\n\n### Annotate sequences\n\n1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases\n   such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams and MetagRoot](https://pavlopoulos-lab.org/envofams/databases/hmmer/)\n2. Functional annotation:\n   - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics.\n3. Predict secondary structure compositional features such as \u03b1-helices, \u03b2-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred))\n4. Present QC stats for input sequences before and after initial pre-processing with ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,fasta\nspecies1,species1_proteins.fasta\nspecies2,species2_proteins.fasta\n```\n\nEach row represents a FASTA file of proteins from a single species.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinannotator \\\n   -profile <docker/singularity/.../institute> \\\n   --input samplesheet.csv \\\n   --outdir <OUTDIR>\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinannotator/usage) and the [parameter documentation](https://nf-co.re/proteinannotator/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinannotator/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinannotator/output).\n\n## Credits\n\nnf-core/proteinannotator was originally written by Olga Botvinnik and Evangelos Karatzas.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Michael L Heuer](https://github.com/heuermh)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Eric Wei](https://github.com/eweizy)\n- [Martin Beracochea](https://github.com/mberacochea)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinannotator` channel](https://nfcore.slack.com/channels/proteinannotator) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinannotator for your analysis, please cite it using the following doi: [10.5281/zenodo.18547735](https://doi.org/10.5281/zenodo.18547735)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
+            "description": "<h1>\n  <picture>\n    <source media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-proteinannotator_logo_dark.png\">\n    <img alt=\"nf-core/proteinannotator\" src=\"docs/images/nf-core-proteinannotator_logo_light.png\">\n  </picture>\n</h1>\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinannotator)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.18547735-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.18547735)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinannotator)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinannotator-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinannotator)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics for protein FASTA inputs and produces protein annotations based on predicted sequence features, including conserved domains, functions, and secondary structure.\n\n<p>\n  <picture>\n    <source media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/proteinannotator_metromap_dark.png\">\n    <img alt=\"nf-core/proteinannotator\" src=\"docs/images/proteinannotator_metromap_light.png\">\n  </picture>\n</p>\n\n### Check quality and pre-process\n\nGenerate input amino acid sequence statistics with ([`SeqFu`](https://github.com/telatin/seqfu2/)) and pre-process them (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) with ([`SeqKit`](https://github.com/shenwei356/seqkit/))\n\n### Annotate sequences\n\n1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases\n   such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams and metagRoot](https://pavlopoulos-lab.org/envofams/databases/hmmer/)\n2. Functional annotation:\n   - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics.\n3. Predict secondary structure compositional features such as \u03b1-helices, \u03b2-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred))\n4. Present QC stats for input sequences before and after initial pre-processing with ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,fasta\nspecies1,species1_proteins.fasta\nspecies2,species2_proteins.fasta\n```\n\nEach row represents a FASTA file of proteins from a single species.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinannotator \\\n   -profile <docker/singularity/.../institute> \\\n   --input samplesheet.csv \\\n   --outdir <OUTDIR>\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinannotator/usage) and the [parameter documentation](https://nf-co.re/proteinannotator/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinannotator/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinannotator/output).\n\n## Credits\n\nnf-core/proteinannotator was originally written by Olga Botvinnik and Evangelos Karatzas.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Michael L Heuer](https://github.com/heuermh)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Eric Wei](https://github.com/eweizy)\n- [Martin Beracochea](https://github.com/mberacochea)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinannotator` channel](https://nfcore.slack.com/channels/proteinannotator) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinannotator for your analysis, please cite it using the following doi: [10.5281/zenodo.18547735](https://doi.org/10.5281/zenodo.18547735)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
             "hasPart": [
                 {
                     "@id": "main.nf"
diff --git a/subworkflows/local/domain_annotation/meta.yml b/subworkflows/local/domain_annotation/meta.yml
index b60228b..630237b 100644
--- a/subworkflows/local/domain_annotation/meta.yml
+++ b/subworkflows/local/domain_annotation/meta.yml
@@ -57,15 +57,15 @@ input:
   - skip_metagroot:
       type: boolean
       description: |
-        Skip domain annotation with MetagRoot
+        Skip domain annotation with metagRoot
   - metagroot_db:
       type: string
       description: |
-        Path to an existing HMM MetagRoot library on the system. If provided, the ARIA2_METAGROOT db download will be skipped.
+        Path to an existing HMM metagRoot library on the system. If provided, the ARIA2_METAGROOT db download will be skipped.
   - metagroot_latest_link:
       type: string
       description: |
-        Path to the latest MetagRoot HMM database, to download
+        Path to the latest metagRoot HMM database, to download
 output:
   - pfam_domains:
       type: file