From 2ad495f1cb19324660d4569380cffe17ab2ffc62 Mon Sep 17 00:00:00 2001 From: angelphanth Date: Thu, 12 Mar 2026 16:50:26 +0000 Subject: [PATCH 01/14] copying integration of pfam and funfam --- conf/modules.config | 22 ++++++++++++ conf/test.config | 1 + main.nf | 3 ++ nextflow.config | 3 ++ nextflow_schema.json | 17 +++++++++ subworkflows/local/domain_annotation/main.nf | 36 +++++++++++++++++++ subworkflows/local/domain_annotation/meta.yml | 16 +++++++++ .../main.nf | 4 +-- workflows/proteinannotator.nf | 8 ++++- 9 files changed, 107 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index ec1428c..9f40a6d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -90,6 +90,17 @@ process { ] } + withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:WGET_MROOT' { + ext.prefix = "HMM" + ext.suffix = "tar.gz" + ext.args = '--no-check-certificate' // explicitly naming output + publishDir = [ + path: { "${params.outdir}/downloaded_dbs/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:HMMSEARCH_PFAM' { ext.args = { "-E ${params.hmmsearch_evalue_cutoff}" } publishDir = [ @@ -110,6 +121,17 @@ process { ] } + withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:HMMSEARCH_MROOT' { + ext.args = { "-E ${params.hmmsearch_evalue_cutoff}" } + publishDir = [ + path: { "${params.outdir}/domain_annotation/mroot/" }, + mode: params.publish_dir_mode, + pattern: "*.domtbl.gz", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:FUNCTIONAL_ANNOTATION:ARIA2' { publishDir = [ path: { "${params.outdir}/downloaded_dbs/" }, diff --git a/conf/test.config b/conf/test.config index 252ec87..02c92be 100644 --- a/conf/test.config +++ b/conf/test.config @@ -27,6 +27,7 @@ params { // Domain annotation pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' + mroot_latest_link = 'https://pavlopoulos-lab.org/metagroot/DownloadHmm' // Functional annotation interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' diff --git a/main.nf b/main.nf index 98d7d67..d7f1972 100644 --- a/main.nf +++ b/main.nf @@ -46,6 +46,9 @@ workflow NFCORE_PROTEINANNOTATOR { params.skip_funfam, params.funfam_db, params.funfam_latest_link, + params.skip_mroot, + params.mroot_db, + params.mroot_latest_link, params.skip_interproscan, params.interproscan_db_url, params.interproscan_db, diff --git a/nextflow.config b/nextflow.config index e56f91f..f83d29d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -25,6 +25,9 @@ params { skip_funfam = false funfam_db = null funfam_latest_link = "https://download.cathdb.info/cath/releases/all-releases/v4_3_0/sequence-data/funfam-hmm3-v4_3_0.lib.gz" + skip_mroot = false + mroot_db = null + mroot_latest_link = "https://pavlopoulos-lab.org/metagroot/DownloadHmm" hmmsearch_evalue_cutoff = 0.001 // Functional annotation diff --git a/nextflow_schema.json b/nextflow_schema.json index b7ad6d8..754cd6c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -276,6 +276,23 @@ "default": "https://download.cathdb.info/cath/releases/all-releases/v4_3_0/sequence-data/funfam-hmm3-v4_3_0.lib.gz", "description": "CATH hosted link to the latest available (v4_3_0) FunFam HMM database file." }, + "skip_mroot": { + "type": "boolean", + "fa_icon": "fas fa-ban", + "description": "Skip the domain annotation with the MetagRoot database.", + "help": "Skips the domain annotation of input sequence against a MetagRoot database." + }, + "mroot_db": { + "type": "string", + "format": "file-path", + "description": "Path to an already installed MetagRoot HMM database (.tar.gz).", + "help_text": "If left null and skip_mroot is false, the pipeline will start downloading the latest MetagRoot HMM library." + }, + "mroot_latest_link": { + "type": "string", + "default": "https://pavlopoulos-lab.org/metagroot/DownloadHmm", + "description": "MetagRoot hosted link to the latest available MetagRoot HMM database file." + }, "hmmsearch_evalue_cutoff": { "type": "number", "default": 0.001, diff --git a/subworkflows/local/domain_annotation/main.nf b/subworkflows/local/domain_annotation/main.nf index 1ec8289..4e76a56 100644 --- a/subworkflows/local/domain_annotation/main.nf +++ b/subworkflows/local/domain_annotation/main.nf @@ -1,7 +1,10 @@ include { ARIA2 as ARIA2_PFAM } from '../../../modules/nf-core/aria2/main' include { ARIA2 as ARIA2_FUNFAM } from '../../../modules/nf-core/aria2/main' +include { WGET as WGET_MROOT } from '../../../modules/nf-core/wget/main' include { HMMER_HMMSEARCH as HMMSEARCH_PFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' +include { HMMER_HMMSEARCH as HMMSEARCH_MROOT } from '../../../modules/nf-core/hmmer/hmmsearch/main' +include { UNTAR as UNTAR_MROOT } from '../../../modules/nf-core/untar/main' workflow DOMAIN_ANNOTATION { take: @@ -12,12 +15,16 @@ workflow DOMAIN_ANNOTATION { skip_funfam // boolean funfam_db // string, path to the funfam HMM database, if already exists funfam_latest_link // string, path to the latest funfam HMM database, to download + skip_mroot // boolean + mroot_db // string, path to the metagroot HMM database, if already exists + mroot_latest_link // string, path to the latest metagroot HMM database, to download main: ch_versions = channel.empty() ch_pfam_domains = channel.empty() ch_funfam_domains = channel.empty() + ch_mroot_domains = channel.empty() if (!skip_pfam) { if (!pfam_db) { @@ -59,8 +66,37 @@ workflow DOMAIN_ANNOTATION { ch_funfam_domains = HMMSEARCH_FUNFAM.out.domain_summary } + if (!skip_mroot) { + if (!mroot_db) { + ch_mroot_link = channel.of([ [ id: 'mroot' ], mroot_latest_link ]) + // download file from url + WGET_MROOT( ch_mroot_link ) + // untar file if its a tar.gz + UNTAR_MROOT( WGET_MROOT.out.outfile ) + // extract hmm files from dir + ch_mroot_db = UNTAR_MROOT.out.untar + .map { + meta, dir -> + // collect all .hmm files from dir + def hmm_files = file("${dir}/**/*.hmm") + tuple(meta, hmm_files) + } + } else { + ch_mroot_db = channel.of([ [ id: 'mroot' ], mroot_db ]) + } + + ch_input_for_hmmsearch_mroot = ch_fasta + .combine(ch_mroot_db) + .map{ meta, seqs, _meta2, models -> [meta, models, seqs, false, false, true] } + + HMMSEARCH_MROOT( ch_input_for_hmmsearch_mroot ) + ch_versions = ch_versions.mix( HMMSEARCH_MROOT.out.versions.first() ) + ch_mroot_domains = HMMSEARCH_MROOT.out.domain_summary + } + emit: pfam_domains = ch_pfam_domains funfam_domains = ch_funfam_domains + mroot_domains = ch_mroot_domains versions = ch_versions } diff --git a/subworkflows/local/domain_annotation/meta.yml b/subworkflows/local/domain_annotation/meta.yml index e04e241..ad37de9 100644 --- a/subworkflows/local/domain_annotation/meta.yml +++ b/subworkflows/local/domain_annotation/meta.yml @@ -42,6 +42,18 @@ input: type: string description: | Path to the latest FunFam HMM database, to download + - skip_mroot: + type: boolean + description: | + Skip domain annotation with MetagRoot + - mroot_db: + type: string + description: | + Path to an existing HMM MetagRoot library on the system. If provided, the ARIA2_METAGROOT db download will be skipped. + - mroot_latest_link: + type: string + description: | + Path to the latest MetagRoot HMM database, to download output: - pfam_domains: type: file @@ -51,6 +63,10 @@ output: type: file description: | domtbl.gz files with funfam domain annotation for input amino acid sequences + - mroot_domains: + type: file + description: | + domtbl.gz files with metagroot domain annotation for input amino acid sequences - versions: type: file description: | diff --git a/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf index 1ba3ccc..7ef2d1a 100644 --- a/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf @@ -180,7 +180,7 @@ def toolCitationText() { params.skip_preprocessing ? "" : "Input sequences were preprocessed with SeqKit (gap trimming, length filtering, validation, duplicate removal) (Shen et al. 2024)." ].join(' ').trim() - def domain_annotation_text = (params.skip_pfam && params.skip_funfam) ? "" : "Domains were annotated with hmmer/hmmsearch (Eddy et al. 2011)." + def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_mroot) ? "" : "Domains were annotated with hmmer/hmmsearch (Eddy et al. 2011)." def prediction_text = params.skip_s4pred ? "" : "Secondary structures were predicted via the s4pred software (Moffat et al. 2021)." @@ -202,7 +202,7 @@ def toolBibliographyText() { params.skip_preprocessing ? '' : '
  • Shen, W., Sipos, B., & Zhao, L. (2024). SeqKit2: A Swiss army knife for sequence and alignment processing. Imeta, 3(3), e191. doi: 10.1002/imt2.191
  • ' ].join(' ').trim() - def domain_annotation_text = (params.skip_pfam && params.skip_funfam) ? '' : '
  • Eddy, S. R. (2011). Accelerated profile HMM searches. PLoS computational biology, 7(10), e1002195. doi: 10.1371/journal.pcbi.1002195
  • ' + def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_mroot) ? '' : '
  • Eddy, S. R. (2011). Accelerated profile HMM searches. PLoS computational biology, 7(10), e1002195. doi: 10.1371/journal.pcbi.1002195
  • ' def prediction_text = params.skip_s4pred ? '' : '
  • Moffat, L., & Jones, D. T. (2021). Increasing the accuracy of single sequence prediction methods using a deep semi-supervised learning framework. Bioinformatics, 37(21), 3744-3751. doi: 10.1093/bioinformatics/btab491
  • ' diff --git a/workflows/proteinannotator.nf b/workflows/proteinannotator.nf index fae1d7a..4398d69 100644 --- a/workflows/proteinannotator.nf +++ b/workflows/proteinannotator.nf @@ -29,6 +29,9 @@ workflow PROTEINANNOTATOR { skip_funfam // boolean funfam_db // string, path to the pfam HMM database, if already exists funfam_latest_link // string, path to the latest pfam HMM database, to download + skip_mroot // boolean + mroot_db // string, path to the metagroot HMM database, if already exists + mroot_latest_link // string, path to the latest metagroot HMM database, to download skip_interproscan // boolean interproscan_db_url // string, url to download db interproscan_db // string, existing db @@ -49,7 +52,10 @@ workflow PROTEINANNOTATOR { pfam_latest_link, skip_funfam, funfam_db, - funfam_latest_link + funfam_latest_link, + skip_mroot, + mroot_db, + mroot_latest_link ) ch_versions = ch_versions.mix( DOMAIN_ANNOTATION.out.versions ) From 63fd565dcbe9a311932fe8feff14a960bdaadc38 Mon Sep 17 00:00:00 2001 From: angelphanth Date: Thu, 12 Mar 2026 16:53:15 +0000 Subject: [PATCH 02/14] nf-core modules wget and untar as a workaround for aria2 to get metagroot tar file? --- modules.json | 5 ++ modules/nf-core/wget/environment.yml | 7 ++ modules/nf-core/wget/main.nf | 48 ++++++++++++++ modules/nf-core/wget/meta.yml | 52 +++++++++++++++ modules/nf-core/wget/tests/main.nf.test | 62 +++++++++++++++++ modules/nf-core/wget/tests/main.nf.test.snap | 70 ++++++++++++++++++++ modules/nf-core/wget/tests/nextflow.config | 6 ++ 7 files changed, 250 insertions(+) create mode 100644 modules/nf-core/wget/environment.yml create mode 100644 modules/nf-core/wget/main.nf create mode 100644 modules/nf-core/wget/meta.yml create mode 100644 modules/nf-core/wget/tests/main.nf.test create mode 100644 modules/nf-core/wget/tests/main.nf.test.snap create mode 100644 modules/nf-core/wget/tests/nextflow.config diff --git a/modules.json b/modules.json index 37ba5b8..6a31b58 100644 --- a/modules.json +++ b/modules.json @@ -59,6 +59,11 @@ "branch": "master", "git_sha": "447f7bc0fa41dfc2400c8cad4c0291880dc060cf", "installed_by": ["modules"] + }, + "wget": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] } } }, diff --git a/modules/nf-core/wget/environment.yml b/modules/nf-core/wget/environment.yml new file mode 100644 index 0000000..9eb304e --- /dev/null +++ b/modules/nf-core/wget/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::wget=1.21.4 diff --git a/modules/nf-core/wget/main.nf b/modules/nf-core/wget/main.nf new file mode 100644 index 0000000..9bc6f15 --- /dev/null +++ b/modules/nf-core/wget/main.nf @@ -0,0 +1,48 @@ +process WGET { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/3b/3b54fa9135194c72a18d00db6b399c03248103f87e43ca75e4b50d61179994b3/data': + 'community.wave.seqera.io/library/wget:1.21.4--8b0fcde81c17be5e' }" + + input: + tuple val(meta), val(url) + + output: + tuple val(meta), path("${prefix}.${suffix}"), emit: outfile + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: 'html' + """ + wget \\ + -O - \\ + $args \\ + $url \\ + > ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wget: \$(wget --version | head -1 | cut -d ' ' -f 3) + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: 'html' + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wget: \$(wget --version | head -1 | cut -d ' ' -f 3) + END_VERSIONS + """ +} diff --git a/modules/nf-core/wget/meta.yml b/modules/nf-core/wget/meta.yml new file mode 100644 index 0000000..56df0af --- /dev/null +++ b/modules/nf-core/wget/meta.yml @@ -0,0 +1,52 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "wget" +description: The non-interactive network downloader +keywords: + - "wget" + - "download" + - "network" +tools: + - "wget": + description: "wget is a free utility for non-interactive download of files from + the Web." + homepage: "https://www.gnu.org/software/wget/" + documentation: "https://www.gnu.org/software/wget/manual/wget.html" + licence: ["GPL"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - url: + type: string + description: URL to download + pattern: "^https?://*.*" + +output: + outfile: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${prefix}.${suffix}: + type: file + description: Downloaded file + pattern: "*.*" + + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@itrujnara" +maintainers: + - "@itrujnara" diff --git a/modules/nf-core/wget/tests/main.nf.test b/modules/nf-core/wget/tests/main.nf.test new file mode 100644 index 0000000..e094288 --- /dev/null +++ b/modules/nf-core/wget/tests/main.nf.test @@ -0,0 +1,62 @@ +// nf-core modules test wget +nextflow_process { + + name "Test Process WGET" + script "../main.nf" + process "WGET" + + tag "modules" + tag "modules_nfcore" + tag "wget" + + test("sarscov2 - gff") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/sarscov2/genome/genome.gff3", + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - gff - stub") { + + options "-stub" + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/sarscov2/genome/genome.gff3", + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/wget/tests/main.nf.test.snap b/modules/nf-core/wget/tests/main.nf.test.snap new file mode 100644 index 0000000..6c05160 --- /dev/null +++ b/modules/nf-core/wget/tests/main.nf.test.snap @@ -0,0 +1,70 @@ +{ + "sarscov2 - gff": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gff3:md5,357435a81a9981a0128e840ebe11051e" + ] + ], + "1": [ + "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35" + ], + "outfile": [ + [ + { + "id": "test" + }, + "test.gff3:md5,357435a81a9981a0128e840ebe11051e" + ] + ], + "versions": [ + "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.4" + }, + "timestamp": "2025-03-26T12:27:32.67617" + }, + "sarscov2 - gff - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35" + ], + "outfile": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.4" + }, + "timestamp": "2025-03-26T12:21:06.414955" + } +} \ No newline at end of file diff --git a/modules/nf-core/wget/tests/nextflow.config b/modules/nf-core/wget/tests/nextflow.config new file mode 100644 index 0000000..236f4e1 --- /dev/null +++ b/modules/nf-core/wget/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: "WGET" { + ext.prefix = "test" + ext.suffix = "gff3" + } +} From 694306403b6b501ddeeadb76a8e93956034fb10f Mon Sep 17 00:00:00 2001 From: angelphanth Date: Mon, 30 Mar 2026 16:26:15 +0100 Subject: [PATCH 03/14] fix ARIA2_MROOT and take: alignment --- subworkflows/local/domain_annotation/main.nf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/domain_annotation/main.nf b/subworkflows/local/domain_annotation/main.nf index 1d0054a..afb1569 100644 --- a/subworkflows/local/domain_annotation/main.nf +++ b/subworkflows/local/domain_annotation/main.nf @@ -1,7 +1,7 @@ include { ARIA2 as ARIA2_PFAM } from '../../../modules/nf-core/aria2/main' include { ARIA2 as ARIA2_FUNFAM } from '../../../modules/nf-core/aria2/main' include { ARIA2 as ARIA2_NMPFAMS } from '../../../modules/nf-core/aria2/main' -include { ARIA2 as ARIA2_NMPFAMS } from '../../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_MROOT } from '../../../modules/nf-core/aria2/main' include { HMMER_HMMSEARCH as HMMSEARCH_PFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' include { HMMER_HMMSEARCH as HMMSEARCH_NMPFAMS } from '../../../modules/nf-core/hmmer/hmmsearch/main' @@ -19,10 +19,10 @@ workflow DOMAIN_ANNOTATION { skip_nmpfams // boolean nmpfams_db // string nmpfams_latest_link // string - skip_mroot // boolean - mroot_db // string, path to the metagroot HMM database, if already exists - mroot_latest_link // string, path to the latest metagroot HMM database, to download - + skip_mroot // boolean + mroot_db // string, path to the metagroot HMM database, if already exists + mroot_latest_link // string, path to the latest metagroot HMM database, to download + main: ch_versions = channel.empty() @@ -90,7 +90,7 @@ workflow DOMAIN_ANNOTATION { ch_versions = ch_versions.mix( HMMSEARCH_NMPFAMS.out.versions.first() ) ch_nmpfams_domains = HMMSEARCH_NMPFAMS.out.domain_summary } - + if (!skip_mroot) { if (!mroot_db) { ch_mroot_link = channel.of([ [ id: 'mroot' ], mroot_latest_link ]) From 1cf0555036da472214fb443c303ce42d382c1ef9 Mon Sep 17 00:00:00 2001 From: angelphanth Date: Mon, 30 Mar 2026 17:15:27 +0100 Subject: [PATCH 04/14] replace 'mroot' with 'metagroot' --- conf/modules.config | 8 +-- conf/test.config | 2 +- conf/test_full.config | 1 + main.nf | 4 +- nextflow.config | 6 +-- nextflow_schema.json | 10 ++-- subworkflows/local/domain_annotation/main.nf | 38 +++++++------- subworkflows/local/domain_annotation/meta.yml | 8 +-- .../domain_annotation/tests/main.nf.test | 51 ++++++++++++++++++- .../main.nf | 4 +- workflows/proteinannotator.nf | 42 +++++++-------- 11 files changed, 112 insertions(+), 62 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 5640258..f382849 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -89,7 +89,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - + withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:ARIA2_NMPFAMS' { publishDir = [ path: { "${params.outdir}/downloaded_dbs/" }, @@ -127,11 +127,11 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - - withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:HMMSEARCH_MROOT' { + + withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:HMMSEARCH_METAGROOT' { ext.args = { "-E ${params.hmmsearch_evalue_cutoff}" } publishDir = [ - path: { "${params.outdir}/domain_annotation/mroot/" }, + path: { "${params.outdir}/domain_annotation/metagroot/" }, mode: params.publish_dir_mode, pattern: "*.domtbl.gz", saveAs: { filename -> filename.equals('versions.yml') ? null : filename } diff --git a/conf/test.config b/conf/test.config index 5a80c47..e392923 100644 --- a/conf/test.config +++ b/conf/test.config @@ -28,7 +28,7 @@ params { pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' nmpfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz' - mroot_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/metagroot/metagrootdb_test.hmm.gz' + metagroot_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/metagroot/metagrootdb_test.hmm.gz' // Functional annotation interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' diff --git a/conf/test_full.config b/conf/test_full.config index bcf1d96..4b63915 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -20,6 +20,7 @@ params { pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' nmpfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz' + metagroot_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/metagroot/metagrootdb_test.hmm.gz' // Functional annotation interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' diff --git a/main.nf b/main.nf index a4e9221..9f7f19b 100644 --- a/main.nf +++ b/main.nf @@ -49,8 +49,8 @@ workflow NFCORE_PROTEINANNOTATOR { params.skip_nmpfams, params.nmpfams_db, params.nmpfams_latest_link, - params.mroot_db, - params.mroot_latest_link, + params.metagroot_db, + params.metagroot_latest_link, params.skip_interproscan, params.interproscan_db_url, params.interproscan_db, diff --git a/nextflow.config b/nextflow.config index 99e06a5..af16afb 100644 --- a/nextflow.config +++ b/nextflow.config @@ -28,9 +28,9 @@ params { skip_nmpfams = false nmpfams_db = null nmpfams_latest_link = "https://pavlopoulos-lab.org/envofams/databases/hmmer/nmpfamsdb.hmm.gz" - skip_mroot = false - mroot_db = null - mroot_latest_link = "https://pavlopoulos-lab.org/envofams/databases/hmmer/metagroot.hmm.gz" + skip_metagroot = false + metagroot_db = null + metagroot_latest_link = "https://pavlopoulos-lab.org/envofams/databases/hmmer/metagroot.hmm.gz" hmmsearch_evalue_cutoff = 0.001 // Functional annotation diff --git a/nextflow_schema.json b/nextflow_schema.json index e3e9c53..ffd915d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -293,22 +293,23 @@ "default": "https://pavlopoulos-lab.org/envofams/databases/hmmer/nmpfamsdb.hmm.gz", "description": "" }, - "skip_mroot": { + "skip_metagroot": { "type": "boolean", "fa_icon": "fas fa-ban", "description": "Skip the domain annotation with the MetagRoot database.", "help": "Skips the domain annotation of input sequence against a MetagRoot database." }, - "mroot_db": { + "metagroot_db": { "type": "string", "format": "file-path", "description": "Path to an already installed MetagRoot HMM database (.hmm.gz).", - "help_text": "If left null and skip_mroot is false, the pipeline will start downloading the latest MetagRoot HMM library." + "help_text": "If left null and skip_metagroot is false, the pipeline will start downloading the latest MetagRoot HMM library." }, - "mroot_latest_link": { + "metagroot_latest_link": { "type": "string", "default": "https://pavlopoulos-lab.org/envofams/databases/hmmer/metagroot.hmm.gz", "description": "MetagRoot hosted link to the latest available MetagRoot HMM database file." + }, "hmmsearch_evalue_cutoff": { "type": "number", "default": 0.001, @@ -395,7 +396,6 @@ { "$ref": "#/$defs/domain_annotation_params" }, - { "$ref": "#/$defs/functional_annotation_options" }, diff --git a/subworkflows/local/domain_annotation/main.nf b/subworkflows/local/domain_annotation/main.nf index afb1569..76692f7 100644 --- a/subworkflows/local/domain_annotation/main.nf +++ b/subworkflows/local/domain_annotation/main.nf @@ -1,11 +1,11 @@ include { ARIA2 as ARIA2_PFAM } from '../../../modules/nf-core/aria2/main' include { ARIA2 as ARIA2_FUNFAM } from '../../../modules/nf-core/aria2/main' include { ARIA2 as ARIA2_NMPFAMS } from '../../../modules/nf-core/aria2/main' -include { ARIA2 as ARIA2_MROOT } from '../../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_METAGROOT } from '../../../modules/nf-core/aria2/main' include { HMMER_HMMSEARCH as HMMSEARCH_PFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' include { HMMER_HMMSEARCH as HMMSEARCH_NMPFAMS } from '../../../modules/nf-core/hmmer/hmmsearch/main' -include { HMMER_HMMSEARCH as HMMSEARCH_MROOT } from '../../../modules/nf-core/hmmer/hmmsearch/main' +include { HMMER_HMMSEARCH as HMMSEARCH_METAGROOT } from '../../../modules/nf-core/hmmer/hmmsearch/main' workflow DOMAIN_ANNOTATION { take: @@ -19,9 +19,9 @@ workflow DOMAIN_ANNOTATION { skip_nmpfams // boolean nmpfams_db // string nmpfams_latest_link // string - skip_mroot // boolean - mroot_db // string, path to the metagroot HMM database, if already exists - mroot_latest_link // string, path to the latest metagroot HMM database, to download + skip_metagroot // boolean + metagroot_db // string, path to the metagroot HMM database, if already exists + metagroot_latest_link // string, path to the latest metagroot HMM database, to download main: @@ -29,7 +29,7 @@ workflow DOMAIN_ANNOTATION { ch_pfam_domains = channel.empty() ch_funfam_domains = channel.empty() ch_nmpfams_domains = channel.empty() - ch_mroot_domains = channel.empty() + ch_metagroot_domains = channel.empty() if (!skip_pfam) { if (!pfam_db) { @@ -91,30 +91,30 @@ workflow DOMAIN_ANNOTATION { ch_nmpfams_domains = HMMSEARCH_NMPFAMS.out.domain_summary } - if (!skip_mroot) { - if (!mroot_db) { - ch_mroot_link = channel.of([ [ id: 'mroot' ], mroot_latest_link ]) + if (!skip_metagroot) { + if (!metagroot_db) { + ch_metagroot_link = channel.of([ [ id: 'metagroot' ], metagroot_latest_link ]) - ARIA2_MROOT( ch_mroot_link ) - ch_versions = ch_versions.mix( ARIA2_MROOT.out.versions ) - ch_mroot_db = ARIA2_MROOT.out.downloaded_file + ARIA2_METAGROOT( ch_metagroot_link ) + ch_versions = ch_versions.mix( ARIA2_METAGROOT.out.versions ) + ch_metagroot_db = ARIA2_METAGROOT.out.downloaded_file } else { - ch_mroot_db = channel.of([ [ id: 'mroot' ], mroot_db ]) + ch_metagroot_db = channel.of([ [ id: 'metagroot' ], metagroot_db ]) } - ch_input_for_hmmsearch_mroot = ch_fasta - .combine(ch_mroot_db) + ch_input_for_hmmsearch_metagroot = ch_fasta + .combine(ch_metagroot_db) .map{ meta, seqs, _meta2, models -> [meta, models, seqs, false, false, true] } - HMMSEARCH_MROOT( ch_input_for_hmmsearch_mroot ) - ch_versions = ch_versions.mix( HMMSEARCH_MROOT.out.versions.first() ) - ch_mroot_domains = HMMSEARCH_MROOT.out.domain_summary + HMMSEARCH_METAGROOT( ch_input_for_hmmsearch_metagroot ) + ch_versions = ch_versions.mix( HMMSEARCH_METAGROOT.out.versions.first() ) + ch_metagroot_domains = HMMSEARCH_METAGROOT.out.domain_summary } emit: pfam_domains = ch_pfam_domains funfam_domains = ch_funfam_domains nmpfams_domains = ch_nmpfams_domains - mroot_domains = ch_mroot_domains + metagroot_domains = ch_metagroot_domains versions = ch_versions } diff --git a/subworkflows/local/domain_annotation/meta.yml b/subworkflows/local/domain_annotation/meta.yml index f408ea1..b60228b 100644 --- a/subworkflows/local/domain_annotation/meta.yml +++ b/subworkflows/local/domain_annotation/meta.yml @@ -54,15 +54,15 @@ input: type: string description: | Path to the latest nmpfamsDB HMM database, to download - - skip_mroot: + - skip_metagroot: type: boolean description: | Skip domain annotation with MetagRoot - - mroot_db: + - metagroot_db: type: string description: | Path to an existing HMM MetagRoot library on the system. If provided, the ARIA2_METAGROOT db download will be skipped. - - mroot_latest_link: + - metagroot_latest_link: type: string description: | Path to the latest MetagRoot HMM database, to download @@ -79,7 +79,7 @@ output: type: file description: | domtbl.gz files with nmpfams domain annotation for input amino acid sequences - - mroot_domains: + - metagroot_domains: type: file description: | domtbl.gz files with metagroot domain annotation for input amino acid sequences diff --git a/subworkflows/local/domain_annotation/tests/main.nf.test b/subworkflows/local/domain_annotation/tests/main.nf.test index 18030f4..d96a12f 100644 --- a/subworkflows/local/domain_annotation/tests/main.nf.test +++ b/subworkflows/local/domain_annotation/tests/main.nf.test @@ -22,6 +22,9 @@ nextflow_workflow { input[7] = true // skip_nmpfams input[8] = null // nmpfams_db input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // nmpfams_latest_link + input[10] = true // skip_metagroot + input[11] = null // metagroot_db + input[12] = params.pipelines_testdata_base_path + '/testdata/metagroot/metagroot_test.hmm.gz' // metagroot_latest_link """ } } @@ -56,6 +59,9 @@ nextflow_workflow { input[7] = true // skip_nmpfams input[8] = null // nmpfams_db input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // nmpfams_latest_link + input[10] = true // skip_metagroot + input[11] = null // metagroot_db + input[12] = params.pipelines_testdata_base_path + '/testdata/metagroot/metagroot_test.hmm.gz' // metagroot_latest_link """ } } @@ -89,6 +95,9 @@ nextflow_workflow { input[7] = false // skip_nmpfams input[8] = null // nmpfams_db input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // nmpfams_latest_link + input[10] = true // skip_metagroot + input[11] = null // metagroot_db + input[12] = params.pipelines_testdata_base_path + '/testdata/metagroot/metagroot_test.hmm.gz' // metagroot_latest_link """ } } @@ -104,6 +113,43 @@ nextflow_workflow { } } + test("faa - metagroot") { + + when { + workflow { + """ + input[0] = channel.of([ + [ id: 'test' ], + file(params.pipelines_testdata_base_path + '/testdata/sequences/test_proteins.faa', checkIfExists: true) + ]) + input[1] = true // skip_pfam + input[2] = null // pfam_db + input[3] = params.pipelines_testdata_base_path + '/testdata/pfam/Pfam-A_test.hmm.gz' // pfam_latest_link + input[4] = true // skip_funfam + input[5] = null // funfam_db + input[6] = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' // funfam_latest_link + input[7] = true // skip_nmpfams + input[8] = null // nmpfams_db + input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // nmpfams_latest_link + input[10] = false // skip_metagroot + input[11] = null // metagroot_db + input[12] = params.pipelines_testdata_base_path + '/testdata/metagroot/metagroot_test.hmm.gz' // metagroot_latest_link + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + path(workflow.out.pfam_domains[0][1]).linesGzip[0..7], + path(workflow.out.funfam_domains[0][1]).linesGzip[0..7], + workflow.out.versions.collect { path(it).yaml }.unique() + ).match()} + ) + } + } + test("faa - domain annotation - stub") { options "-stub" @@ -123,7 +169,10 @@ nextflow_workflow { input[6] = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' // funfam_latest_link input[7] = false // skip_nmpfams input[8] = null // nmpfams_db - input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // nmpfams_latest_link + input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // nmpfams_latest_link + input[10] = false // skip_metagroot + input[11] = null // metagroot_db + input[12] = params.pipelines_testdata_base_path + '/testdata/metagroot/metagroot_test.hmm.gz' // metagroot_latest_link """ } } diff --git a/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf index 8a67633..ded790e 100644 --- a/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf @@ -180,7 +180,7 @@ def toolCitationText() { params.skip_preprocessing ? "" : "Input sequences were preprocessed with SeqKit (gap trimming, length filtering, validation, duplicate removal) (Shen et al. 2024)." ].join(' ').trim() - def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_nmpfams && params.skip_mroot) ? "" : "Domains were annotated with hmmer/hmmsearch (Eddy et al. 2011)." + def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_nmpfams && params.skip_metagroot) ? "" : "Domains were annotated with hmmer/hmmsearch (Eddy et al. 2011)." def prediction_text = params.skip_s4pred ? "" : "Secondary structures were predicted via the s4pred software (Moffat et al. 2021)." @@ -202,7 +202,7 @@ def toolBibliographyText() { params.skip_preprocessing ? '' : '
  • Shen, W., Sipos, B., & Zhao, L. (2024). SeqKit2: A Swiss army knife for sequence and alignment processing. Imeta, 3(3), e191. doi: 10.1002/imt2.191
  • ' ].join(' ').trim() - def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_nmpfams && params.skip_mroot) ? '' : '
  • Eddy, S. R. (2011). Accelerated profile HMM searches. PLoS computational biology, 7(10), e1002195. doi: 10.1371/journal.pcbi.1002195
  • ' + def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_nmpfams && params.skip_metagroot) ? '' : '
  • Eddy, S. R. (2011). Accelerated profile HMM searches. PLoS computational biology, 7(10), e1002195. doi: 10.1371/journal.pcbi.1002195
  • ' def prediction_text = params.skip_s4pred ? '' : '
  • Moffat, L., & Jones, D. T. (2021). Increasing the accuracy of single sequence prediction methods using a deep semi-supervised learning framework. Bioinformatics, 37(21), 3744-3751. doi: 10.1093/bioinformatics/btab491
  • ' diff --git a/workflows/proteinannotator.nf b/workflows/proteinannotator.nf index 55aa8fe..a0d99fa 100644 --- a/workflows/proteinannotator.nf +++ b/workflows/proteinannotator.nf @@ -21,24 +21,24 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_prot workflow PROTEINANNOTATOR { take: - ch_samplesheet // channel: samplesheet read in from --input - skip_preprocessing // boolean - skip_pfam // boolean - pfam_db // string, path to the pfam HMM database, if already exists - pfam_latest_link // string, path to the latest pfam HMM database, to download - skip_funfam // boolean - funfam_db // string, path to the pfam HMM database, if already exists - funfam_latest_link // string, path to the latest pfam HMM database, to download - skip_nmpfams // boolean - nmpfams_db // string - nmpfams_latest_link // string - skip_mroot // boolean - mroot_db // string, path to the metagroot HMM database, if already exists - mroot_latest_link // string, path to the latest metagroot HMM database, to download - skip_interproscan // boolean - interproscan_db_url // string, url to download db - interproscan_db // string, existing db - skip_s4pred // boolean + ch_samplesheet // channel: samplesheet read in from --input + skip_preprocessing // boolean + skip_pfam // boolean + pfam_db // string, path to the pfam HMM database, if already exists + pfam_latest_link // string, path to the latest pfam HMM database, to download + skip_funfam // boolean + funfam_db // string, path to the pfam HMM database, if already exists + funfam_latest_link // string, path to the latest pfam HMM database, to download + skip_nmpfams // boolean + nmpfams_db // string + nmpfams_latest_link // string + skip_metagroot // boolean + metagroot_db // string, path to the metagroot HMM database, if already exists + metagroot_latest_link // string, path to the latest metagroot HMM database, to download + skip_interproscan // boolean + interproscan_db_url // string, url to download db + interproscan_db // string, existing db + skip_s4pred // boolean main: @@ -59,9 +59,9 @@ workflow PROTEINANNOTATOR { skip_nmpfams, nmpfams_db, nmpfams_latest_link, - skip_mroot, - mroot_db, - mroot_latest_link + skip_metagroot, + metagroot_db, + metagroot_latest_link ) ch_versions = ch_versions.mix( DOMAIN_ANNOTATION.out.versions ) From f1372263298f214d4e089ce9b066232bbe3bd388 Mon Sep 17 00:00:00 2001 From: angelphanth Date: Mon, 30 Mar 2026 17:26:42 +0100 Subject: [PATCH 05/14] align = and comments --- conf/modules.config | 8 ++++ nextflow.config | 6 +-- subworkflows/local/domain_annotation/main.nf | 40 ++++++++++---------- 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index f382849..b325242 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -98,6 +98,14 @@ process { ] } + withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:ARIA2_METAGROOT' { + publishDir = [ + path: { "${params.outdir}/downloaded_dbs/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:HMMSEARCH_PFAM' { ext.args = { "-E ${params.hmmsearch_evalue_cutoff}" } publishDir = [ diff --git a/nextflow.config b/nextflow.config index af16afb..b96d124 100644 --- a/nextflow.config +++ b/nextflow.config @@ -28,9 +28,9 @@ params { skip_nmpfams = false nmpfams_db = null nmpfams_latest_link = "https://pavlopoulos-lab.org/envofams/databases/hmmer/nmpfamsdb.hmm.gz" - skip_metagroot = false - metagroot_db = null - metagroot_latest_link = "https://pavlopoulos-lab.org/envofams/databases/hmmer/metagroot.hmm.gz" + skip_metagroot = false + metagroot_db = null + metagroot_latest_link = "https://pavlopoulos-lab.org/envofams/databases/hmmer/metagroot.hmm.gz" hmmsearch_evalue_cutoff = 0.001 // Functional annotation diff --git a/subworkflows/local/domain_annotation/main.nf b/subworkflows/local/domain_annotation/main.nf index 76692f7..456d620 100644 --- a/subworkflows/local/domain_annotation/main.nf +++ b/subworkflows/local/domain_annotation/main.nf @@ -1,11 +1,11 @@ -include { ARIA2 as ARIA2_PFAM } from '../../../modules/nf-core/aria2/main' -include { ARIA2 as ARIA2_FUNFAM } from '../../../modules/nf-core/aria2/main' -include { ARIA2 as ARIA2_NMPFAMS } from '../../../modules/nf-core/aria2/main' -include { ARIA2 as ARIA2_METAGROOT } from '../../../modules/nf-core/aria2/main' -include { HMMER_HMMSEARCH as HMMSEARCH_PFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' -include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' -include { HMMER_HMMSEARCH as HMMSEARCH_NMPFAMS } from '../../../modules/nf-core/hmmer/hmmsearch/main' -include { HMMER_HMMSEARCH as HMMSEARCH_METAGROOT } from '../../../modules/nf-core/hmmer/hmmsearch/main' +include { ARIA2 as ARIA2_PFAM } from '../../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_FUNFAM } from '../../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_NMPFAMS } from '../../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_METAGROOT } from '../../../modules/nf-core/aria2/main' +include { HMMER_HMMSEARCH as HMMSEARCH_PFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' +include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' +include { HMMER_HMMSEARCH as HMMSEARCH_NMPFAMS } from '../../../modules/nf-core/hmmer/hmmsearch/main' +include { HMMER_HMMSEARCH as HMMSEARCH_METAGROOT } from '../../../modules/nf-core/hmmer/hmmsearch/main' workflow DOMAIN_ANNOTATION { take: @@ -19,17 +19,17 @@ workflow DOMAIN_ANNOTATION { skip_nmpfams // boolean nmpfams_db // string nmpfams_latest_link // string - skip_metagroot // boolean - metagroot_db // string, path to the metagroot HMM database, if already exists - metagroot_latest_link // string, path to the latest metagroot HMM database, to download + skip_metagroot // boolean + metagroot_db // string, path to the metagroot HMM database, if already exists + metagroot_latest_link // string, path to the latest metagroot HMM database, to download main: - ch_versions = channel.empty() - ch_pfam_domains = channel.empty() - ch_funfam_domains = channel.empty() - ch_nmpfams_domains = channel.empty() - ch_metagroot_domains = channel.empty() + ch_versions = channel.empty() + ch_pfam_domains = channel.empty() + ch_funfam_domains = channel.empty() + ch_nmpfams_domains = channel.empty() + ch_metagroot_domains = channel.empty() if (!skip_pfam) { if (!pfam_db) { @@ -112,9 +112,9 @@ workflow DOMAIN_ANNOTATION { } emit: - pfam_domains = ch_pfam_domains - funfam_domains = ch_funfam_domains - nmpfams_domains = ch_nmpfams_domains + pfam_domains = ch_pfam_domains + funfam_domains = ch_funfam_domains + nmpfams_domains = ch_nmpfams_domains metagroot_domains = ch_metagroot_domains - versions = ch_versions + versions = ch_versions } From a06b808d79aedca7f20447fbcbc645f964d6fd72 Mon Sep 17 00:00:00 2001 From: angelphanth Date: Mon, 30 Mar 2026 17:54:07 +0100 Subject: [PATCH 06/14] remove nf-core/module wget --- modules.json | 5 -- modules/nf-core/wget/environment.yml | 7 -- modules/nf-core/wget/main.nf | 48 -------------- modules/nf-core/wget/meta.yml | 52 --------------- modules/nf-core/wget/tests/main.nf.test | 62 ----------------- modules/nf-core/wget/tests/main.nf.test.snap | 70 -------------------- modules/nf-core/wget/tests/nextflow.config | 6 -- 7 files changed, 250 deletions(-) delete mode 100644 modules/nf-core/wget/environment.yml delete mode 100644 modules/nf-core/wget/main.nf delete mode 100644 modules/nf-core/wget/meta.yml delete mode 100644 modules/nf-core/wget/tests/main.nf.test delete mode 100644 modules/nf-core/wget/tests/main.nf.test.snap delete mode 100644 modules/nf-core/wget/tests/nextflow.config diff --git a/modules.json b/modules.json index 6a31b58..37ba5b8 100644 --- a/modules.json +++ b/modules.json @@ -59,11 +59,6 @@ "branch": "master", "git_sha": "447f7bc0fa41dfc2400c8cad4c0291880dc060cf", "installed_by": ["modules"] - }, - "wget": { - "branch": "master", - "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", - "installed_by": ["modules"] } } }, diff --git a/modules/nf-core/wget/environment.yml b/modules/nf-core/wget/environment.yml deleted file mode 100644 index 9eb304e..0000000 --- a/modules/nf-core/wget/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -channels: - - conda-forge - - bioconda -dependencies: - - conda-forge::wget=1.21.4 diff --git a/modules/nf-core/wget/main.nf b/modules/nf-core/wget/main.nf deleted file mode 100644 index 9bc6f15..0000000 --- a/modules/nf-core/wget/main.nf +++ /dev/null @@ -1,48 +0,0 @@ -process WGET { - tag "$meta.id" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/3b/3b54fa9135194c72a18d00db6b399c03248103f87e43ca75e4b50d61179994b3/data': - 'community.wave.seqera.io/library/wget:1.21.4--8b0fcde81c17be5e' }" - - input: - tuple val(meta), val(url) - - output: - tuple val(meta), path("${prefix}.${suffix}"), emit: outfile - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - suffix = task.ext.suffix ?: 'html' - """ - wget \\ - -O - \\ - $args \\ - $url \\ - > ${prefix}.${suffix} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - wget: \$(wget --version | head -1 | cut -d ' ' -f 3) - END_VERSIONS - """ - - stub: - prefix = task.ext.prefix ?: "${meta.id}" - suffix = task.ext.suffix ?: 'html' - """ - touch ${prefix}.${suffix} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - wget: \$(wget --version | head -1 | cut -d ' ' -f 3) - END_VERSIONS - """ -} diff --git a/modules/nf-core/wget/meta.yml b/modules/nf-core/wget/meta.yml deleted file mode 100644 index 56df0af..0000000 --- a/modules/nf-core/wget/meta.yml +++ /dev/null @@ -1,52 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "wget" -description: The non-interactive network downloader -keywords: - - "wget" - - "download" - - "network" -tools: - - "wget": - description: "wget is a free utility for non-interactive download of files from - the Web." - homepage: "https://www.gnu.org/software/wget/" - documentation: "https://www.gnu.org/software/wget/manual/wget.html" - licence: ["GPL"] - identifier: "" - -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ]` - - url: - type: string - description: URL to download - pattern: "^https?://*.*" - -output: - outfile: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ]` - - ${prefix}.${suffix}: - type: file - description: Downloaded file - pattern: "*.*" - - ontologies: [] - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - - ontologies: - - edam: http://edamontology.org/format_3750 # YAML -authors: - - "@itrujnara" -maintainers: - - "@itrujnara" diff --git a/modules/nf-core/wget/tests/main.nf.test b/modules/nf-core/wget/tests/main.nf.test deleted file mode 100644 index e094288..0000000 --- a/modules/nf-core/wget/tests/main.nf.test +++ /dev/null @@ -1,62 +0,0 @@ -// nf-core modules test wget -nextflow_process { - - name "Test Process WGET" - script "../main.nf" - process "WGET" - - tag "modules" - tag "modules_nfcore" - tag "wget" - - test("sarscov2 - gff") { - - config "./nextflow.config" - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/sarscov2/genome/genome.gff3", - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("sarscov2 - gff - stub") { - - options "-stub" - - config "./nextflow.config" - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/sarscov2/genome/genome.gff3", - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - -} diff --git a/modules/nf-core/wget/tests/main.nf.test.snap b/modules/nf-core/wget/tests/main.nf.test.snap deleted file mode 100644 index 6c05160..0000000 --- a/modules/nf-core/wget/tests/main.nf.test.snap +++ /dev/null @@ -1,70 +0,0 @@ -{ - "sarscov2 - gff": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.gff3:md5,357435a81a9981a0128e840ebe11051e" - ] - ], - "1": [ - "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35" - ], - "outfile": [ - [ - { - "id": "test" - }, - "test.gff3:md5,357435a81a9981a0128e840ebe11051e" - ] - ], - "versions": [ - "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35" - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.4" - }, - "timestamp": "2025-03-26T12:27:32.67617" - }, - "sarscov2 - gff - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35" - ], - "outfile": [ - [ - { - "id": "test", - "single_end": false - }, - "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35" - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.4" - }, - "timestamp": "2025-03-26T12:21:06.414955" - } -} \ No newline at end of file diff --git a/modules/nf-core/wget/tests/nextflow.config b/modules/nf-core/wget/tests/nextflow.config deleted file mode 100644 index 236f4e1..0000000 --- a/modules/nf-core/wget/tests/nextflow.config +++ /dev/null @@ -1,6 +0,0 @@ -process { - withName: "WGET" { - ext.prefix = "test" - ext.suffix = "gff3" - } -} From 35c56e9b81ad20952527e3eb5872db1f5d4f83bd Mon Sep 17 00:00:00 2001 From: angelphanth Date: Mon, 30 Mar 2026 17:54:53 +0100 Subject: [PATCH 07/14] add MetagRoot to docs --- CHANGELOG.md | 1 + README.md | 2 +- docs/output.md | 11 +++++++---- docs/usage.md | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 202bd64..69eb988 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#90](https://github.com/nf-core/proteinannotator/pull/90) - Added the option to download and use the latest `MetagRoot` HMM library (or use path to an existing one) for domain annotation. (by @angelphanth) - [#87](https://github.com/nf-core/proteinannotator/pull/87) - Added the option to download and use the latest `NMPFams` HMM library (or use path to an existing one) for domain annotation. (by @npechl) - [#85](https://github.com/nf-core/proteinannotator/pull/85) - Added zenodo doi in `nextflow.config`. (by @vagkaratzas) diff --git a/README.md b/README.md index 2b8f037..f128e0a 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ Generate input amino acid sequence statistics with ([`SeqFu`](https://github.com ### Annotate sequences 1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases - such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams](https://pavlopoulos-lab.org/envofams/databases/hmmer/) + such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams and MetagRoot](https://pavlopoulos-lab.org/envofams/databases/hmmer/) 2. Functional annotation: - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics. 3. Predict secondary structure compositional features such as α-helices, β-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred)) diff --git a/docs/output.md b/docs/output.md index 0e6387f..3a8e1ec 100644 --- a/docs/output.md +++ b/docs/output.md @@ -14,9 +14,9 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [SeqFu](#seqfu) for input amino acid sequences quality control (QC) - [SeqKit](#seqkit) for preprocessing input amino acid sequences (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) - [Database download](#database-download) Optionally download selected databases for annotation. - - [aria2](#aria2) - To optionally download the Pfam, FunFam, NMPFams and/or InterProScan databases through the pipeline. + - [aria2](#aria2) - To optionally download the Pfam, FunFam, NMPFams, MetagRoot and/or InterProScan databases through the pipeline. - [Domain annotation](#domain-annotation) Annotate proteins with domains from established repositories. - - [hmmer](#hmmer) - To optionally match the input sequence to known Pfam, FunFam and/or NMPFams domains through `hmmer/hmmsearch` + - [hmmer](#hmmer) - To optionally match the input sequence to known Pfam, FunFam, NMPFams and/or MetagRoot domains through `hmmer/hmmsearch` - [Functional annotation](#functional-annotation) Annotate proteins with functional domains - [InterProScan](#Interproscan) - Search the InterProScan database for functional domains - [s4pred](#s4pred) - Predict secondary structures of sequences, producing amino acid level probabilities of forming an α-helix, a β-strand or a coil. @@ -73,10 +73,11 @@ The `seqkit` module is used for initial preprocessing (i.e., gap removal, conver - `interproscan_test.tar.gz`: (optional) the downloaded InterProScan archive of member databases according to the optional user-provided url - `funfam-hmm3-v4_3_0*.lib.gz`: (optional) The latest (v4_3_0) full, or a minimal test, FunFam HMM database that can be downloaded through the pipeline. - `nmpfamsdb.hmm.gz`: (optional) The latest full, or a minimal test, NMPFams HMM database that can be downloaded through the pipeline. + - `metagroot.hmm.gz`: (optional) The latest full, or a minimal test, MetagRoot HMM database that can be downloaded through the pipeline. -If the `skip_*` flags (e.g., `skip_pfam`, `skip_funfam`, `skip_nmpfams`, `skip_interproscan`) for each annotation database is set to `true`, or the `*_db` parameter paths (e.g., `pfam_db`, `funfam_db`, `nmpfams_db`, `interproscan_db`) are set (i.e., not `null`), or the run is resumed after a successful database download, then the respective database will not be (re)downloaded. The full database links can be found in the main `nextflow.config` file, while minimal test versions can be found in the `test` and `test_full` profiles (i.e., `conf/test.config`, `conf/test_full.config`). +If the `skip_*` flags (e.g., `skip_pfam`, `skip_funfam`, `skip_nmpfams`, `skip_metagroot`, `skip_interproscan`) for each annotation database is set to `true`, or the `*_db` parameter paths (e.g., `pfam_db`, `funfam_db`, `nmpfams_db`, `metagroot_db`, `interproscan_db`) are set (i.e., not `null`), or the run is resumed after a successful database download, then the respective database will not be (re)downloaded. The full database links can be found in the main `nextflow.config` file, while minimal test versions can be found in the `test` and `test_full` profiles (i.e., `conf/test.config`, `conf/test_full.config`). [aria2](https://github.com/aria2/aria2/) is a lightweight multi-protocol & multi-source, cross platform download utility operated in command-line. It supports HTTP/HTTPS, FTP, SFTP, BitTorrent and Metalink. @@ -94,10 +95,12 @@ If the `skip_*` flags (e.g., `skip_pfam`, `skip_funfam`, `skip_nmpfams`, `skip_i - `.domtbl.gz`: `hmmer/hmmsearch` results along parameters info. - `nmpfams/` - `.domtbl.gz`: `hmmer/hmmsearch` results along parameters info. + - `metagroot/` + - `.domtbl.gz`: `hmmer/hmmsearch` results along parameters info. -Each of the `domain_annotation/` subfolders (e.g., `pfam`, `funfam`, `nmpfams`) contain a `.domtbl.gz` annotation file per input sample, depending on which domain annotation databases were used in the pipeline execution. +Each of the `domain_annotation/` subfolders (e.g., `pfam`, `funfam`, `nmpfams`, `metagroot`) contain a `.domtbl.gz` annotation file per input sample, depending on which domain annotation databases were used in the pipeline execution. [hmmer](https://github.com/EddyRivasLab/hmmer) is a fast and flexible alignment trimming tool that keeps phylogenetically informative sites and removes others. diff --git a/docs/usage.md b/docs/usage.md index 72d53cc..a2c17cc 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -7,7 +7,7 @@ ## Introduction **nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics and generates sequence-level annotations for amino acid sequences. -It takes a protein FASTA file as input and performs conserved domain annotation (using Pfam, FunFam and NMPFams HMM databases), functional annotation (using InterProScan), and secondary structure prediction (using s4pred). +It takes a protein FASTA file as input and performs conserved domain annotation (using Pfam, FunFam, NMPFams and MetagRoot HMM databases), functional annotation (using InterProScan), and secondary structure prediction (using s4pred). Optionally, paths to pre-downloaded databases can be provided to skip the automatic download steps and speed up repeated runs. ## Samplesheet input From 38d2c48c54ced72e0bb756f5cb72690fe521922c Mon Sep 17 00:00:00 2001 From: angelphanth Date: Mon, 30 Mar 2026 17:55:20 +0100 Subject: [PATCH 08/14] align = --- conf/test.config | 6 +++--- conf/test_full.config | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/conf/test.config b/conf/test.config index e392923..27bda92 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,9 +25,9 @@ params { // Input data input = params.pipelines_testdata_base_path + 'proteinannotator/samplesheet/samplesheet.csv' // Domain annotation - pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' - funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' - nmpfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz' + pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' + funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' + nmpfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz' metagroot_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/metagroot/metagrootdb_test.hmm.gz' // Functional annotation interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan/interproscan_test.tar.gz' diff --git a/conf/test_full.config b/conf/test_full.config index 4b63915..02e244f 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,9 +17,9 @@ params { // Input data for full size test input = params.pipelines_testdata_base_path + 'proteinannotator/samplesheet/samplesheet.csv' // Domain annotation - pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' - funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' - nmpfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz' + pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' + funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' + nmpfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz' metagroot_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/metagroot/metagrootdb_test.hmm.gz' // Functional annotation interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan_test.tar.gz' From 6a539e95aa9663d7320c7ba513e3cb666254d896 Mon Sep 17 00:00:00 2001 From: angelphanth Date: Mon, 30 Mar 2026 17:56:00 +0100 Subject: [PATCH 09/14] update snapshot for domain_annotation tests --- .../domain_annotation/tests/main.nf.test | 3 +- .../domain_annotation/tests/main.nf.test.snap | 55 ++++++++++++++++++- tests/.nftignore | 3 + 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/domain_annotation/tests/main.nf.test b/subworkflows/local/domain_annotation/tests/main.nf.test index d96a12f..0dc2598 100644 --- a/subworkflows/local/domain_annotation/tests/main.nf.test +++ b/subworkflows/local/domain_annotation/tests/main.nf.test @@ -142,8 +142,7 @@ nextflow_workflow { assertAll( { assert workflow.success}, { assert snapshot( - path(workflow.out.pfam_domains[0][1]).linesGzip[0..7], - path(workflow.out.funfam_domains[0][1]).linesGzip[0..7], + path(workflow.out.metagroot_domains[0][1]).linesGzip[0..7], workflow.out.versions.collect { path(it).yaml }.unique() ).match()} ) diff --git a/subworkflows/local/domain_annotation/tests/main.nf.test.snap b/subworkflows/local/domain_annotation/tests/main.nf.test.snap index 80ce69a..fd4f74e 100644 --- a/subworkflows/local/domain_annotation/tests/main.nf.test.snap +++ b/subworkflows/local/domain_annotation/tests/main.nf.test.snap @@ -1,4 +1,35 @@ { + "faa - metagroot": { + "content": [ + [ + "# --- full sequence --- -------------- this domain ------------- hmm coord ali coord env coord", + "# target name accession tlen query name accession qlen E-value score bias # of c-Evalue i-Evalue score bias from to from to from to acc description of target", + "#------------------- ---------- ----- -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------", + "T1024 - 408 F101326 - 425 9.3e-13 34.9 26.2 1 1 1.8e-12 3.6e-12 33.0 26.2 13 351 18 340 12 407 0.74 LmrP, , 408 residues|", + "T1024 - 408 F226054 - 421 1.3e-13 37.4 26.4 1 1 8.6e-14 1.7e-13 37.0 26.4 2 404 2 404 1 408 0.73 LmrP, , 408 residues|", + "T1024 - 408 F240027 - 384 8.4e-10 25.0 5.2 1 1 8e-10 1.6e-09 24.1 5.2 26 163 26 160 6 178 0.88 LmrP, , 408 residues|", + "T1024 - 408 F287588 - 413 2e-10 26.9 23.3 1 1 1.6e-10 3.1e-10 26.3 23.3 48 363 42 370 30 406 0.74 LmrP, , 408 residues|", + "T1024 - 408 F294204 - 387 3.8e-06 12.8 25.9 1 1 2.8e-06 5.6e-06 12.3 25.9 16 372 41 406 30 408 0.76 LmrP, , 408 residues|" + ], + [ + { + "DOMAIN_ANNOTATION:HMMSEARCH_METAGROOT": { + "hmmer": 3.4 + } + }, + { + "DOMAIN_ANNOTATION:ARIA2_METAGROOT": { + "aria2": "1.36.0" + } + } + ] + ], + "timestamp": "2026-03-30T17:28:28.71093", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, "faa - domain annotation": { "content": [ [ @@ -44,7 +75,7 @@ } ] ], - "timestamp": "2026-03-13T14:51:37.636657", + "timestamp": "2026-03-30T17:28:01.729059", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -135,9 +166,19 @@ ] ], "3": [ + [ + { + "id": "test" + }, + "test.domtbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,046e5161e3e1dff2ab111ddf4bb27331", "versions.yml:md5,160d4c5a5001cfb4ff57b94fc52b67d9", "versions.yml:md5,1b7d208e42364fb87160693faa4e83b9", "versions.yml:md5,35e41735706132967dd94bb636833a4a", + "versions.yml:md5,55939a7ab71dab922d448cf99472feeb", "versions.yml:md5,9045f482d64e7666e62932b0578b665e", "versions.yml:md5,a74a0c8fcb741e59bc14424f612b8d09", "versions.yml:md5,f1d8a406d3dcb97a7c15e9c810926de1" @@ -150,6 +191,14 @@ "test.domtbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "metagroot_domains": [ + [ + { + "id": "test" + }, + "test.domtbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], "nmpfams_domains": [ [ { @@ -167,16 +216,18 @@ ] ], "versions": [ + "versions.yml:md5,046e5161e3e1dff2ab111ddf4bb27331", "versions.yml:md5,160d4c5a5001cfb4ff57b94fc52b67d9", "versions.yml:md5,1b7d208e42364fb87160693faa4e83b9", "versions.yml:md5,35e41735706132967dd94bb636833a4a", + "versions.yml:md5,55939a7ab71dab922d448cf99472feeb", "versions.yml:md5,9045f482d64e7666e62932b0578b665e", "versions.yml:md5,a74a0c8fcb741e59bc14424f612b8d09", "versions.yml:md5,f1d8a406d3dcb97a7c15e9c810926de1" ] } ], - "timestamp": "2026-03-13T09:45:07.520815", + "timestamp": "2026-03-30T17:28:37.677345", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" diff --git a/tests/.nftignore b/tests/.nftignore index 0b6bd76..6441f81 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -17,6 +17,9 @@ domain_annotation/funfam/l_arginase.domtbl.gz domain_annotation/nmpfams/T1024.domtbl.gz domain_annotation/nmpfams/T1026.domtbl.gz domain_annotation/nmpfams/l_arginase.domtbl.gz +domain_annotation/metagroot/T1024.domtbl.gz +domain_annotation/metagroot/T1026.domtbl.gz +domain_annotation/metagroot/l_arginase.domtbl.gz functional_annotation/interproscan/T1024/T1024.gff3 functional_annotation/interproscan/T1024/T1024.tsv functional_annotation/interproscan/T1026/T1026.gff3 From 7f8269bb682dfc803d4243162888900b53f40cf4 Mon Sep 17 00:00:00 2001 From: angelphanth Date: Mon, 30 Mar 2026 18:00:05 +0100 Subject: [PATCH 10/14] updated meta via nf-core pipelines lint --- ro-crate-metadata.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 5028582..6cb8579 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "InProgress", "datePublished": "2026-02-09T13:54:13+00:00", - "description": "

    \n \n \n \"nf-core/proteinannotator\"\n \n

    \n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinannotator)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.18547735-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.18547735)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinannotator)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinannotator-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinannotator)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics for protein FASTA inputs and produces protein annotations based on predicted sequence features, including conserved domains, functions, and secondary structure.\n\n

    \n \n \n \"nf-core/proteinannotator\"\n \n

    \n\n### Check quality and pre-process\n\nGenerate input amino acid sequence statistics with ([`SeqFu`](https://github.com/telatin/seqfu2/)) and pre-process them (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) with ([`SeqKit`](https://github.com/shenwei356/seqkit/))\n\n### Annotate sequences\n\n1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases\n such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams](https://pavlopoulos-lab.org/envofams/databases/hmmer/)\n2. Functional annotation:\n - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics.\n3. Predict secondary structure compositional features such as \u03b1-helices, \u03b2-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred))\n4. Present QC stats for input sequences before and after initial pre-processing with ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,fasta\nspecies1,species1_proteins.fasta\nspecies2,species2_proteins.fasta\n```\n\nEach row represents a FASTA file of proteins from a single species.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinannotator \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinannotator/usage) and the [parameter documentation](https://nf-co.re/proteinannotator/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinannotator/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinannotator/output).\n\n## Credits\n\nnf-core/proteinannotator was originally written by Olga Botvinnik and Evangelos Karatzas.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Michael L Heuer](https://github.com/heuermh)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Eric Wei](https://github.com/eweizy)\n- [Martin Beracochea](https://github.com/mberacochea)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinannotator` channel](https://nfcore.slack.com/channels/proteinannotator) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinannotator for your analysis, please cite it using the following doi: [10.5281/zenodo.18547735](https://doi.org/10.5281/zenodo.18547735)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "

    \n \n \n \"nf-core/proteinannotator\"\n \n

    \n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinannotator)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.18547735-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.18547735)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinannotator)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinannotator-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinannotator)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics for protein FASTA inputs and produces protein annotations based on predicted sequence features, including conserved domains, functions, and secondary structure.\n\n

    \n \n \n \"nf-core/proteinannotator\"\n \n

    \n\n### Check quality and pre-process\n\nGenerate input amino acid sequence statistics with ([`SeqFu`](https://github.com/telatin/seqfu2/)) and pre-process them (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) with ([`SeqKit`](https://github.com/shenwei356/seqkit/))\n\n### Annotate sequences\n\n1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases\n such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams and MetagRoot](https://pavlopoulos-lab.org/envofams/databases/hmmer/)\n2. Functional annotation:\n - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics.\n3. Predict secondary structure compositional features such as \u03b1-helices, \u03b2-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred))\n4. Present QC stats for input sequences before and after initial pre-processing with ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,fasta\nspecies1,species1_proteins.fasta\nspecies2,species2_proteins.fasta\n```\n\nEach row represents a FASTA file of proteins from a single species.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinannotator \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinannotator/usage) and the [parameter documentation](https://nf-co.re/proteinannotator/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinannotator/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinannotator/output).\n\n## Credits\n\nnf-core/proteinannotator was originally written by Olga Botvinnik and Evangelos Karatzas.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Michael L Heuer](https://github.com/heuermh)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Eric Wei](https://github.com/eweizy)\n- [Martin Beracochea](https://github.com/mberacochea)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinannotator` channel](https://nfcore.slack.com/channels/proteinannotator) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinannotator for your analysis, please cite it using the following doi: [10.5281/zenodo.18547735](https://doi.org/10.5281/zenodo.18547735)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" From 406b4cf2b72f1d5e3c9a4482ec9dee353f4df909 Mon Sep 17 00:00:00 2001 From: angelphanth Date: Tue, 31 Mar 2026 09:16:30 +0100 Subject: [PATCH 11/14] add missing skip param --- main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/main.nf b/main.nf index 9f7f19b..e29a062 100644 --- a/main.nf +++ b/main.nf @@ -49,6 +49,7 @@ workflow NFCORE_PROTEINANNOTATOR { params.skip_nmpfams, params.nmpfams_db, params.nmpfams_latest_link, + params.skip_metagroot, params.metagroot_db, params.metagroot_latest_link, params.skip_interproscan, From 0ecbfdf762528d8c0635e31242969a086a18f3da Mon Sep 17 00:00:00 2001 From: angelphanth Date: Tue, 31 Mar 2026 10:40:13 +0100 Subject: [PATCH 12/14] rename metagrootdb_test to metagroot_test --- conf/test.config | 2 +- conf/test_full.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/test.config b/conf/test.config index 27bda92..23f4a82 100644 --- a/conf/test.config +++ b/conf/test.config @@ -28,7 +28,7 @@ params { pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' nmpfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz' - metagroot_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/metagroot/metagrootdb_test.hmm.gz' + metagroot_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/metagroot/metagroot_test.hmm.gz' // Functional annotation interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' diff --git a/conf/test_full.config b/conf/test_full.config index 02e244f..966ee53 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -20,7 +20,7 @@ params { pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' nmpfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz' - metagroot_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/metagroot/metagrootdb_test.hmm.gz' + metagroot_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/metagroot/metagroot_test.hmm.gz' // Functional annotation interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' From 60f38c7d18a176ef6c0dc54b3454d941c3f15bbd Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Tue, 31 Mar 2026 11:54:17 +0100 Subject: [PATCH 13/14] pipeline-level snapshot updated --- tests/default.nf.test.snap | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 10d5d5a..48525fe 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test": { "content": [ - 36, + 40, { "ARIA2": { "aria2": "1.36.0" @@ -9,6 +9,9 @@ "ARIA2_FUNFAM": { "aria2": "1.36.0" }, + "ARIA2_METAGROOT": { + "aria2": "1.36.0" + }, "ARIA2_NMPFAMS": { "aria2": "1.36.0" }, @@ -18,6 +21,9 @@ "HMMSEARCH_FUNFAM": { "hmmer": 3.4 }, + "HMMSEARCH_METAGROOT": { + "hmmer": 3.4 + }, "HMMSEARCH_NMPFAMS": { "hmmer": 3.4 }, @@ -58,6 +64,10 @@ "domain_annotation/funfam/T1024.domtbl.gz", "domain_annotation/funfam/T1026.domtbl.gz", "domain_annotation/funfam/l_arginase.domtbl.gz", + "domain_annotation/metagroot", + "domain_annotation/metagroot/T1024.domtbl.gz", + "domain_annotation/metagroot/T1026.domtbl.gz", + "domain_annotation/metagroot/l_arginase.domtbl.gz", "domain_annotation/nmpfams", "domain_annotation/nmpfams/T1024.domtbl.gz", "domain_annotation/nmpfams/T1026.domtbl.gz", @@ -88,6 +98,7 @@ "downloaded_dbs/interproscan_db/tigrfam/15.0/TIGRFAMs_15.0_HMM.LIB", "downloaded_dbs/interproscan_db/tigrfam/15.0/TIGRFAMs_HMM.LIB", "downloaded_dbs/interproscan_test.tar.gz", + "downloaded_dbs/metagroot_test.hmm.gz", "downloaded_dbs/nmpfamsdb_test.hmm.gz", "functional_annotation", "functional_annotation/interproscan", @@ -193,6 +204,7 @@ "TIGRFAMs_15.0_HMM.LIB:md5,64f2b2c9e834b47b17d91bb9a6a0067e", "TIGRFAMs_HMM.LIB:md5,543da3f4b65eed9ec393986c6c6ff0ba", "interproscan_test.tar.gz:md5,cde88c0cd841c84dc1203e64854c762b", + "metagroot_test.hmm.gz:md5,d23de95bf39fb6e27ffb266ce61ac98e", "nmpfamsdb_test.hmm.gz:md5,ad7a094618ccfdaeed1c03e93f6abf1e", "T1024.json:md5,0288f7551a14faedc409dd374b3e073e", "T1024.xml:md5,63a3db0eb0e1f76403411602c23b721e", @@ -232,8 +244,8 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nextflow": "25.10.4" }, - "timestamp": "2026-03-14T10:06:42.466898492" + "timestamp": "2026-03-31T11:50:46.606922418" } } \ No newline at end of file From 972f149ab8873019043ced03d38cff58d212afa7 Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Tue, 31 Mar 2026 12:03:10 +0100 Subject: [PATCH 14/14] rename MetagRoot to metagRoot --- CHANGELOG.md | 2 +- README.md | 2 +- docs/output.md | 6 +++--- docs/usage.md | 2 +- nextflow_schema.json | 10 +++++----- ro-crate-metadata.json | 2 +- subworkflows/local/domain_annotation/meta.yml | 6 +++--- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 69eb988..88578a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- [#90](https://github.com/nf-core/proteinannotator/pull/90) - Added the option to download and use the latest `MetagRoot` HMM library (or use path to an existing one) for domain annotation. (by @angelphanth) +- [#90](https://github.com/nf-core/proteinannotator/pull/90) - Added the option to download and use the latest `metagRoot` HMM library (or use path to an existing one) for domain annotation. (by @angelphanth) - [#87](https://github.com/nf-core/proteinannotator/pull/87) - Added the option to download and use the latest `NMPFams` HMM library (or use path to an existing one) for domain annotation. (by @npechl) - [#85](https://github.com/nf-core/proteinannotator/pull/85) - Added zenodo doi in `nextflow.config`. (by @vagkaratzas) diff --git a/README.md b/README.md index f128e0a..dac12bd 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ Generate input amino acid sequence statistics with ([`SeqFu`](https://github.com ### Annotate sequences 1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases - such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams and MetagRoot](https://pavlopoulos-lab.org/envofams/databases/hmmer/) + such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams and metagRoot](https://pavlopoulos-lab.org/envofams/databases/hmmer/) 2. Functional annotation: - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics. 3. Predict secondary structure compositional features such as α-helices, β-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred)) diff --git a/docs/output.md b/docs/output.md index 3a8e1ec..85e3439 100644 --- a/docs/output.md +++ b/docs/output.md @@ -14,9 +14,9 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [SeqFu](#seqfu) for input amino acid sequences quality control (QC) - [SeqKit](#seqkit) for preprocessing input amino acid sequences (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) - [Database download](#database-download) Optionally download selected databases for annotation. - - [aria2](#aria2) - To optionally download the Pfam, FunFam, NMPFams, MetagRoot and/or InterProScan databases through the pipeline. + - [aria2](#aria2) - To optionally download the Pfam, FunFam, NMPFams, metagRoot and/or InterProScan databases through the pipeline. - [Domain annotation](#domain-annotation) Annotate proteins with domains from established repositories. - - [hmmer](#hmmer) - To optionally match the input sequence to known Pfam, FunFam, NMPFams and/or MetagRoot domains through `hmmer/hmmsearch` + - [hmmer](#hmmer) - To optionally match the input sequence to known Pfam, FunFam, NMPFams and/or metagRoot domains through `hmmer/hmmsearch` - [Functional annotation](#functional-annotation) Annotate proteins with functional domains - [InterProScan](#Interproscan) - Search the InterProScan database for functional domains - [s4pred](#s4pred) - Predict secondary structures of sequences, producing amino acid level probabilities of forming an α-helix, a β-strand or a coil. @@ -73,7 +73,7 @@ The `seqkit` module is used for initial preprocessing (i.e., gap removal, conver - `interproscan_test.tar.gz`: (optional) the downloaded InterProScan archive of member databases according to the optional user-provided url - `funfam-hmm3-v4_3_0*.lib.gz`: (optional) The latest (v4_3_0) full, or a minimal test, FunFam HMM database that can be downloaded through the pipeline. - `nmpfamsdb.hmm.gz`: (optional) The latest full, or a minimal test, NMPFams HMM database that can be downloaded through the pipeline. - - `metagroot.hmm.gz`: (optional) The latest full, or a minimal test, MetagRoot HMM database that can be downloaded through the pipeline. + - `metagroot.hmm.gz`: (optional) The latest full, or a minimal test, metagRoot HMM database that can be downloaded through the pipeline. diff --git a/docs/usage.md b/docs/usage.md index a2c17cc..8978d1e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -7,7 +7,7 @@ ## Introduction **nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics and generates sequence-level annotations for amino acid sequences. -It takes a protein FASTA file as input and performs conserved domain annotation (using Pfam, FunFam, NMPFams and MetagRoot HMM databases), functional annotation (using InterProScan), and secondary structure prediction (using s4pred). +It takes a protein FASTA file as input and performs conserved domain annotation (using Pfam, FunFam, NMPFams and metagRoot HMM databases), functional annotation (using InterProScan), and secondary structure prediction (using s4pred). Optionally, paths to pre-downloaded databases can be provided to skip the automatic download steps and speed up repeated runs. ## Samplesheet input diff --git a/nextflow_schema.json b/nextflow_schema.json index ffd915d..1d79c7a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -296,19 +296,19 @@ "skip_metagroot": { "type": "boolean", "fa_icon": "fas fa-ban", - "description": "Skip the domain annotation with the MetagRoot database.", - "help": "Skips the domain annotation of input sequence against a MetagRoot database." + "description": "Skip the domain annotation with the metagRoot database.", + "help": "Skips the domain annotation of input sequence against a metagRoot database." }, "metagroot_db": { "type": "string", "format": "file-path", - "description": "Path to an already installed MetagRoot HMM database (.hmm.gz).", - "help_text": "If left null and skip_metagroot is false, the pipeline will start downloading the latest MetagRoot HMM library." + "description": "Path to an already installed metagRoot HMM database (.hmm.gz).", + "help_text": "If left null and skip_metagroot is false, the pipeline will start downloading the latest metagRoot HMM library." }, "metagroot_latest_link": { "type": "string", "default": "https://pavlopoulos-lab.org/envofams/databases/hmmer/metagroot.hmm.gz", - "description": "MetagRoot hosted link to the latest available MetagRoot HMM database file." + "description": "metagRoot hosted link to the latest available metagRoot HMM database file." }, "hmmsearch_evalue_cutoff": { "type": "number", diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 6cb8579..e3f06a1 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "InProgress", "datePublished": "2026-02-09T13:54:13+00:00", - "description": "

    \n \n \n \"nf-core/proteinannotator\"\n \n

    \n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinannotator)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.18547735-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.18547735)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinannotator)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinannotator-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinannotator)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics for protein FASTA inputs and produces protein annotations based on predicted sequence features, including conserved domains, functions, and secondary structure.\n\n

    \n \n \n \"nf-core/proteinannotator\"\n \n

    \n\n### Check quality and pre-process\n\nGenerate input amino acid sequence statistics with ([`SeqFu`](https://github.com/telatin/seqfu2/)) and pre-process them (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) with ([`SeqKit`](https://github.com/shenwei356/seqkit/))\n\n### Annotate sequences\n\n1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases\n such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams and MetagRoot](https://pavlopoulos-lab.org/envofams/databases/hmmer/)\n2. Functional annotation:\n - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics.\n3. Predict secondary structure compositional features such as \u03b1-helices, \u03b2-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred))\n4. Present QC stats for input sequences before and after initial pre-processing with ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,fasta\nspecies1,species1_proteins.fasta\nspecies2,species2_proteins.fasta\n```\n\nEach row represents a FASTA file of proteins from a single species.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinannotator \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinannotator/usage) and the [parameter documentation](https://nf-co.re/proteinannotator/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinannotator/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinannotator/output).\n\n## Credits\n\nnf-core/proteinannotator was originally written by Olga Botvinnik and Evangelos Karatzas.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Michael L Heuer](https://github.com/heuermh)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Eric Wei](https://github.com/eweizy)\n- [Martin Beracochea](https://github.com/mberacochea)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinannotator` channel](https://nfcore.slack.com/channels/proteinannotator) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinannotator for your analysis, please cite it using the following doi: [10.5281/zenodo.18547735](https://doi.org/10.5281/zenodo.18547735)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "

    \n \n \n \"nf-core/proteinannotator\"\n \n

    \n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinannotator)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.18547735-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.18547735)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinannotator)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinannotator-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinannotator)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics for protein FASTA inputs and produces protein annotations based on predicted sequence features, including conserved domains, functions, and secondary structure.\n\n

    \n \n \n \"nf-core/proteinannotator\"\n \n

    \n\n### Check quality and pre-process\n\nGenerate input amino acid sequence statistics with ([`SeqFu`](https://github.com/telatin/seqfu2/)) and pre-process them (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) with ([`SeqKit`](https://github.com/shenwei356/seqkit/))\n\n### Annotate sequences\n\n1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases\n such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams and metagRoot](https://pavlopoulos-lab.org/envofams/databases/hmmer/)\n2. Functional annotation:\n - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics.\n3. Predict secondary structure compositional features such as \u03b1-helices, \u03b2-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred))\n4. Present QC stats for input sequences before and after initial pre-processing with ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,fasta\nspecies1,species1_proteins.fasta\nspecies2,species2_proteins.fasta\n```\n\nEach row represents a FASTA file of proteins from a single species.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinannotator \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinannotator/usage) and the [parameter documentation](https://nf-co.re/proteinannotator/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinannotator/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinannotator/output).\n\n## Credits\n\nnf-core/proteinannotator was originally written by Olga Botvinnik and Evangelos Karatzas.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Michael L Heuer](https://github.com/heuermh)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Eric Wei](https://github.com/eweizy)\n- [Martin Beracochea](https://github.com/mberacochea)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinannotator` channel](https://nfcore.slack.com/channels/proteinannotator) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinannotator for your analysis, please cite it using the following doi: [10.5281/zenodo.18547735](https://doi.org/10.5281/zenodo.18547735)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" diff --git a/subworkflows/local/domain_annotation/meta.yml b/subworkflows/local/domain_annotation/meta.yml index b60228b..630237b 100644 --- a/subworkflows/local/domain_annotation/meta.yml +++ b/subworkflows/local/domain_annotation/meta.yml @@ -57,15 +57,15 @@ input: - skip_metagroot: type: boolean description: | - Skip domain annotation with MetagRoot + Skip domain annotation with metagRoot - metagroot_db: type: string description: | - Path to an existing HMM MetagRoot library on the system. If provided, the ARIA2_METAGROOT db download will be skipped. + Path to an existing HMM metagRoot library on the system. If provided, the ARIA2_METAGROOT db download will be skipped. - metagroot_latest_link: type: string description: | - Path to the latest MetagRoot HMM database, to download + Path to the latest metagRoot HMM database, to download output: - pfam_domains: type: file