From d892c1bbf3fb663aff56d6e72ea075a71af0f94a Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Mon, 8 Jun 2026 00:05:14 +0800 Subject: [PATCH] Add gcta/grmcutoff module --- .../nf-core/gcta/grmcutoff/environment.yml | 7 + modules/nf-core/gcta/grmcutoff/main.nf | 41 +++++ modules/nf-core/gcta/grmcutoff/meta.yml | 86 +++++++++ .../nf-core/gcta/grmcutoff/tests/main.nf.test | 163 ++++++++++++++++++ .../gcta/grmcutoff/tests/main.nf.test.snap | 78 +++++++++ 5 files changed, 375 insertions(+) create mode 100644 modules/nf-core/gcta/grmcutoff/environment.yml create mode 100644 modules/nf-core/gcta/grmcutoff/main.nf create mode 100644 modules/nf-core/gcta/grmcutoff/meta.yml create mode 100644 modules/nf-core/gcta/grmcutoff/tests/main.nf.test create mode 100644 modules/nf-core/gcta/grmcutoff/tests/main.nf.test.snap diff --git a/modules/nf-core/gcta/grmcutoff/environment.yml b/modules/nf-core/gcta/grmcutoff/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/grmcutoff/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/grmcutoff/main.nf b/modules/nf-core/gcta/grmcutoff/main.nf new file mode 100644 index 000000000000..21806a5d056f --- /dev/null +++ b/modules/nf-core/gcta/grmcutoff/main.nf @@ -0,0 +1,41 @@ +process GCTA_GRMCUTOFF { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data' + : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" + + input: + tuple val(meta), path(grm_files) + val cutoff + + output: + tuple val(meta), path("${prefix}.grm.*"), emit: grm_files + tuple val(meta), path("${prefix}.grm.id"), emit: keep_file + tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}_grmcutoff" + """ + gcta \\ + --grm ${meta.id} \\ + --grm-cutoff ${cutoff} \\ + --make-grm \\ + --out ${prefix} \\ + --thread-num ${task.cpus} \\ + ${args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}_grmcutoff" + """ + touch ${prefix}.grm.id + touch ${prefix}.grm.bin + touch ${prefix}.grm.N.bin + """ +} diff --git a/modules/nf-core/gcta/grmcutoff/meta.yml b/modules/nf-core/gcta/grmcutoff/meta.yml new file mode 100644 index 000000000000..90c5fc386abc --- /dev/null +++ b/modules/nf-core/gcta/grmcutoff/meta.yml @@ -0,0 +1,86 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_grmcutoff" +description: Apply a genetic relationship cutoff to a dense GRM using `gcta --grm-cutoff` +keywords: + - gcta + - genome-wide complex trait analysis + - grm + - genetic relationship matrix + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + licence: ["GPL-3.0-only"] + identifier: "biotools:gcta" + +input: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata. + `meta.id` is the required GRM basename consumed by `--grm` and must match + the staged dense GRM files. + e.g. `[ id:'tiny_dense' ]` requires + `tiny_dense.grm.id`, `tiny_dense.grm.bin`, and `tiny_dense.grm.N.bin`. + - grm_files: + type: file + description: Dense GRM file bundle with basename `${meta.id}` + pattern: "*.grm.*" + ontologies: [] + - cutoff: + type: float + description: Genetic relationship cutoff value passed to `gcta --grm-cutoff`. + +output: + grm_files: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata. + `meta.id` remains the dense-GRM basename contract used for `--grm`. + - "${prefix}.grm.*": + type: file + description: Relatedness-filtered GRM file bundle + pattern: "${prefix}.grm.*" + ontologies: [] + keep_file: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata. + `meta.id` remains the dense-GRM basename contract used for `--grm`. + - "${prefix}.grm.id": + type: file + description: Keep file of unrelated individuals emitted by GCTA + pattern: "${prefix}.grm.id" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to retrieve the GCTA version + +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to retrieve the GCTA version + +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" diff --git a/modules/nf-core/gcta/grmcutoff/tests/main.nf.test b/modules/nf-core/gcta/grmcutoff/tests/main.nf.test new file mode 100644 index 000000000000..66182604e8d0 --- /dev/null +++ b/modules/nf-core/gcta/grmcutoff/tests/main.nf.test @@ -0,0 +1,163 @@ +nextflow_process { + + name "Test Process GCTA_GRMCUTOFF" + script "../main.nf" + process "GCTA_GRMCUTOFF" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/grmcutoff" + tag "gcta/makegrm" + + setup { + run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_CONTRACT") { + script "../../makegrm/main.nf" + process { + """ + file('tiny_dense.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'tiny_dense' ], + file('tiny_dense.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + """ + } + } + + run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_STUB") { + script "../../makegrm/main.nf" + process { + """ + file('stub_dense.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'stub_dense' ], + file('stub_dense.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + """ + } + } + } + + test("homo_sapiens popgen - apply GRM cutoff to dense GRM") { + when { + process { + """ + input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files + input[1] = 0.05 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.grm_files.size() == 1 }, + { assert process.out.keep_file.size() == 1 }, + { assert process.out.grm_files.get(0).get(0).id == "tiny_dense" }, + { + def grm_row = process.out.grm_files.get(0) + def expected_prefix = "${grm_row.get(0).id}_grmcutoff" + assert grm_row.get(1).collect { file(it).name }.sort() == [ + "${expected_prefix}.grm.N.bin", + "${expected_prefix}.grm.bin", + "${expected_prefix}.grm.id" + ] + assert file(process.out.keep_file.get(0).get(1)).name == "${expected_prefix}.grm.id" + }, + { + assert snapshot(sanitizeOutput(process.out)).match() + } + ) + } + } + + test("homo_sapiens popgen - GRM cutoff fails when meta.id is not GRM basename") { + when { + process { + """ + input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files.map { meta, grm_files -> + [[ id:'tiny_dense_mismatched' ], grm_files] + } + input[1] = 0.05 + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.exitStatus != 0 } + ) + } + } + + test("homo_sapiens popgen - GRM cutoff fails for malformed GRM tuple") { + when { + process { + """ + input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files.map { meta, grm_files -> + [[ id:meta.id ]] + } + input[1] = 0.05 + """ + } + } + + then { + assert !process.success + } + } + + test("homo_sapiens popgen - apply GRM cutoff to dense GRM - stub") { + options "-stub" + + when { + process { + """ + input[0] = GCTA_MAKEGRM_STUB.out.grm_files + input[1] = 0.125 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.grm_files.get(0).get(0).id == "stub_dense" }, + { + def grm_row = process.out.grm_files.get(0) + def expected_prefix = "${grm_row.get(0).id}_grmcutoff" + assert grm_row.get(1).collect { file(it).name }.sort() == [ + "${expected_prefix}.grm.N.bin", + "${expected_prefix}.grm.bin", + "${expected_prefix}.grm.id" + ] + assert file(process.out.keep_file.get(0).get(1)).name == "${expected_prefix}.grm.id" + }, + { + assert snapshot(sanitizeOutput(process.out)).match() + } + ) + } + } +} diff --git a/modules/nf-core/gcta/grmcutoff/tests/main.nf.test.snap b/modules/nf-core/gcta/grmcutoff/tests/main.nf.test.snap new file mode 100644 index 000000000000..eccc773841fb --- /dev/null +++ b/modules/nf-core/gcta/grmcutoff/tests/main.nf.test.snap @@ -0,0 +1,78 @@ +{ + "homo_sapiens popgen - apply GRM cutoff to dense GRM - stub": { + "content": [ + { + "grm_files": [ + [ + { + "id": "stub_dense" + }, + [ + "stub_dense_grmcutoff.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "stub_dense_grmcutoff.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "stub_dense_grmcutoff.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "keep_file": [ + [ + { + "id": "stub_dense" + }, + "stub_dense_grmcutoff.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_GRMCUTOFF", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-06-07T23:52:40.106468066" + }, + "homo_sapiens popgen - apply GRM cutoff to dense GRM": { + "content": [ + { + "grm_files": [ + [ + { + "id": "tiny_dense" + }, + [ + "tiny_dense_grmcutoff.grm.N.bin:md5,06b73ea8bae8f1e5f5d4de33dbd2c75e", + "tiny_dense_grmcutoff.grm.bin:md5,b1f124463eecbae86840a6651eec372d", + "tiny_dense_grmcutoff.grm.id:md5,ca8c0bded6951fdd3bf0dddc97b6df6b" + ] + ] + ], + "keep_file": [ + [ + { + "id": "tiny_dense" + }, + "tiny_dense_grmcutoff.grm.id:md5,ca8c0bded6951fdd3bf0dddc97b6df6b" + ] + ], + "versions_gcta": [ + [ + "GCTA_GRMCUTOFF", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-06-07T23:52:04.979644911" + } +} \ No newline at end of file