From 839ca40131434ee6e7b4cb36705f879ca1b5aca6 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Wed, 27 May 2026 02:38:16 +0800 Subject: [PATCH 01/13] Add REGENIE splitl0 module --- .../nf-core/regenie/splitl0/environment.yml | 7 + modules/nf-core/regenie/splitl0/main.nf | 58 +++++++ modules/nf-core/regenie/splitl0/meta.yml | 156 ++++++++++++++++++ .../regenie/splitl0/tests/main.nf.test | 148 +++++++++++++++++ .../regenie/splitl0/tests/main.nf.test.snap | 84 ++++++++++ .../regenie/splitl0/tests/nextflow.config | 5 + 6 files changed, 458 insertions(+) create mode 100644 modules/nf-core/regenie/splitl0/environment.yml create mode 100644 modules/nf-core/regenie/splitl0/main.nf create mode 100644 modules/nf-core/regenie/splitl0/meta.yml create mode 100644 modules/nf-core/regenie/splitl0/tests/main.nf.test create mode 100644 modules/nf-core/regenie/splitl0/tests/main.nf.test.snap create mode 100644 modules/nf-core/regenie/splitl0/tests/nextflow.config diff --git a/modules/nf-core/regenie/splitl0/environment.yml b/modules/nf-core/regenie/splitl0/environment.yml new file mode 100644 index 000000000000..98fe8277cc05 --- /dev/null +++ b/modules/nf-core/regenie/splitl0/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::regenie=4.1.2" diff --git a/modules/nf-core/regenie/splitl0/main.nf b/modules/nf-core/regenie/splitl0/main.nf new file mode 100644 index 000000000000..38f3435f52f4 --- /dev/null +++ b/modules/nf-core/regenie/splitl0/main.nf @@ -0,0 +1,58 @@ +process REGENIE_SPLITL0 { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data' + : 'community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf'}" + + input: + tuple val(meta), path(plink_genotype_file), path(plink_variant_file), path(plink_sample_file) + tuple val(meta2), path(pheno) + tuple val(meta3), path(covar) + val bsize + val n_jobs + + output: + tuple val(meta), path("*.master"), emit: master + tuple val(meta), path("*_job*.snplist"), emit: snplists + tuple val(meta), path("*.log"), emit: log + tuple val("${task.process}"), val('regenie'), eval('regenie --version 2>&1 | sed -n "1{s/^v//;s/\\.gz$//;p}"'), topic: versions, emit: versions_regenie + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def input_prefix = plink_genotype_file.baseName + def prefix = task.ext.prefix ?: input_prefix + def genotype_flag = plink_genotype_file.name.endsWith('.pgen') ? '--pgen' : '--bed' + def covar_arg = covar ? "--covarFile ${covar}" : '' + def bsize_arg = bsize ?: 1000 + """ + regenie \\ + --step 1 \\ + ${genotype_flag} ${input_prefix} \\ + --phenoFile ${pheno} \\ + ${covar_arg} \\ + --bsize ${bsize_arg} \\ + --gz \\ + --threads ${task.cpus} \\ + ${args} \\ + --out ${prefix} \\ + --split-l0 ${prefix},${n_jobs} + """ + + stub: + def input_prefix = plink_genotype_file.baseName + def prefix = task.ext.prefix ?: input_prefix + def job_count = n_jobs as Integer + def snplist_lines = (1..job_count).collect { job -> "touch ${prefix}_job${job}.snplist" }.join('\n') + def master_lines = (1..job_count).collect { job -> "${prefix}_job${job} ${prefix}_job${job}.snplist" }.join('\\n') + """ + printf 'job snplist\\n${master_lines}\\n' > ${prefix}.master + ${snplist_lines} + touch ${prefix}.log + """ +} diff --git a/modules/nf-core/regenie/splitl0/meta.yml b/modules/nf-core/regenie/splitl0/meta.yml new file mode 100644 index 000000000000..7a2de6f6cce6 --- /dev/null +++ b/modules/nf-core/regenie/splitl0/meta.yml @@ -0,0 +1,156 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "regenie_splitl0" +description: Split REGENIE step 1 level-0 ridge-regression blocks into parallel jobs +keywords: + - regenie + - gwas + - association + - genomics + - parallel +tools: + - "regenie": + description: "Regenie is a C++ program for whole genome regression modelling of large genome-wide association studies (GWAS)." + homepage: "https://rgcgithub.github.io/regenie/" + documentation: "https://rgcgithub.github.io/regenie/options/" + tool_dev_url: "https://github.com/rgcgithub/regenie" + doi: "10.1038/s41588-021-00870-7" + licence: ["MIT"] + identifier: "biotools:regenie" + +input: + - - meta: + type: map + description: | + Groovy Map containing genotype information + Keep only the genotype analysis identifier in this map + REGENIE consumes the staged basename of `plink_genotype_file` as the `--bed` or `--pgen` prefix, so the `.bed/.bim/.fam` or `.pgen/.pvar/.psam` files must share one basename + e.g. `[ id:'cohort' ]` + - plink_genotype_file: + type: file + description: PLINK primary genotype file in BED or PGEN format + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" # BED + - plink_variant_file: + type: file + description: PLINK variant metadata file in BIM or PVAR format + pattern: "*.{bim,pvar,zst}" + ontologies: [] + - plink_sample_file: + type: file + description: PLINK sample metadata file in FAM or PSAM format + pattern: "*.{fam,psam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing genotype/sample information associated with the phenotype file input + Use the same phenotype file and phenotype-selection arguments for all `regenie/splitl0`, `regenie/runl0`, and `regenie/runl1` jobs in the same chunked step 1 analysis + e.g. `[ id:'plink_simulated' ]` + - pheno: + type: file + description: Phenotype file passed to `--phenoFile` + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - - meta3: + type: map + description: | + Groovy Map containing genotype/sample information associated with the covariate input + Use compatible covariate inputs for all stages in the same chunked step 1 analysis + e.g. `[ id:'plink_simulated' ]` + - covar: + type: file + optional: true + description: Optional covariate file passed to `--covarFile`; provide `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - bsize: + type: integer + description: Optional block size passed to `--bsize`; pass `[]` to use the module default of `1000` + - n_jobs: + type: integer + description: Number of level-0 jobs requested with `--split-l0` + +output: + master: + - - meta: + type: map + description: | + Groovy Map containing genotype/sample information + e.g. `[ id:'plink_simulated' ]` + - "*.master": + type: file + description: REGENIE split level-0 master file + pattern: "*.master" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + snplists: + - - meta: + type: map + description: | + Groovy Map containing genotype/sample information + e.g. `[ id:'plink_simulated' ]` + - "*_job*.snplist": + type: file + description: REGENIE per-job variant list files referenced by the master file + pattern: "*_job*.snplist" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + log: + - - meta: + type: map + description: | + Groovy Map containing genotype information + e.g. `[ id:'plink_simulated' ]` + - "*.log": + type: file + description: REGENIE split level-0 log file + pattern: "*.log" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + versions_regenie: + - - "${task.process}": + type: string + description: The process the versions were collected from + - "regenie": + type: string + description: The tool name + - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"': + type: eval + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - regenie: + type: string + description: The tool name + - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"': + type: eval + description: The command used to generate the version of the tool + +notes: | + `task.ext.args` is passed directly to REGENIE and can be used for stage-consistent options such as `--phenoColList`, `--bt`, `--loocv`, or `--keep-l0`. + The same phenotype file, phenotype-selection arguments, trait mode arguments such as `--bt`, and compatible genotype/covariate inputs must be used across `regenie/splitl0`, every matching `regenie/runl0` job, and `regenie/runl1`. +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" +containers: + conda: + linux_amd64: + lock_file: "modules/nf-core/regenie/splitl0/.conda-lock/linux_amd64-bd-5d361f9fcb2f85cf_1.txt" + docker: + linux_amd64: + build_id: "bd-5d361f9fcb2f85cf_1" + name: "community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf" + scanId: "sc-cc9eb5ed5eb381dd_2" + singularity: + linux_amd64: + build_id: "bd-7c121fb4ecd57890_1" + name: "oras://community.wave.seqera.io/library/regenie:4.1.2--7c121fb4ecd57890" + https: "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data" diff --git a/modules/nf-core/regenie/splitl0/tests/main.nf.test b/modules/nf-core/regenie/splitl0/tests/main.nf.test new file mode 100644 index 000000000000..3932aca10356 --- /dev/null +++ b/modules/nf-core/regenie/splitl0/tests/main.nf.test @@ -0,0 +1,148 @@ +nextflow_process { + + name "Test Process REGENIE_SPLITL0" + config "./nextflow.config" + script "../main.nf" + process "REGENIE_SPLITL0" + + tag "modules" + tag "modules_nfcore" + tag "regenie" + tag "regenie/splitl0" + + test("homo_sapiens popgen - quantitative plink1 with covariates") { + + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[3] = 100 + input[4] = 2 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.master.size() == 1 }, + { assert process.out.snplists.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.master.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.snplists.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, + { + def master = path(process.out.master.get(0).get(1)) + def lines = master.text.readLines().findAll { it } + assert master.exists() + assert lines.size() == 3 + assert lines[0] ==~ /\d+\s+\d+/ + assert lines.drop(1).every { line -> + line.contains('plink_simulated_job') && !line.contains('/') + } + }, + { + def snplists = process.out.snplists.get(0).get(1) + assert snplists.size() == 2 + assert snplists.collect { path(it).getFileName().toString() }.sort() == [ + 'plink_simulated_job1.snplist', + 'plink_simulated_job2.snplist' + ] + }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { + def stableMaster = process.out.master.collect { master -> + [master[0], path(master[1]).getFileName().toString()] + } + def stableSnplists = process.out.snplists.collect { snplist -> + [snplist[0], snplist[1].collect { path(it).getFileName().toString() }.sort()] + } + assert snapshot( + stableMaster, + stableSnplists, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + + test("homo_sapiens popgen - plink1 - stub") { + + options "-stub" + + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[3] = 100 + input[4] = 2 + """ + } + } + + then { + assertAll( + { assert process.success }, + { + def stableMaster = process.out.master.collect { master -> + [master[0], path(master[1]).getFileName().toString()] + } + def stableSnplists = process.out.snplists.collect { snplist -> + [snplist[0], snplist[1].collect { path(it).getFileName().toString() }.sort()] + } + def stableLogs = process.out.log.collect { log -> + [log[0], path(log[1]).getFileName().toString()] + } + assert snapshot( + stableMaster, + stableSnplists, + stableLogs, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/regenie/splitl0/tests/main.nf.test.snap b/modules/nf-core/regenie/splitl0/tests/main.nf.test.snap new file mode 100644 index 000000000000..a89bab88ca8f --- /dev/null +++ b/modules/nf-core/regenie/splitl0/tests/main.nf.test.snap @@ -0,0 +1,84 @@ +{ + "homo_sapiens popgen - quantitative plink1 with covariates": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.master" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + [ + "plink_simulated_job1.snplist", + "plink_simulated_job2.snplist" + ] + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_SPLITL0", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-21T22:23:00.198898381" + }, + "homo_sapiens popgen - plink1 - stub": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.master" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + [ + "plink_simulated_job1.snplist", + "plink_simulated_job2.snplist" + ] + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.log" + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_SPLITL0", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-21T22:23:14.857699924" + } +} \ No newline at end of file diff --git a/modules/nf-core/regenie/splitl0/tests/nextflow.config b/modules/nf-core/regenie/splitl0/tests/nextflow.config new file mode 100644 index 000000000000..a21fcfdd4745 --- /dev/null +++ b/modules/nf-core/regenie/splitl0/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: REGENIE_SPLITL0 { + ext.args = params.module_args + } +} From 6861c91817513feb19f989d59c9382fe0edb2b33 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Thu, 28 May 2026 18:15:17 +0800 Subject: [PATCH 02/13] Update REGENIE splitl0 metadata --- modules/nf-core/regenie/splitl0/meta.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/nf-core/regenie/splitl0/meta.yml b/modules/nf-core/regenie/splitl0/meta.yml index 7a2de6f6cce6..2590342868a3 100644 --- a/modules/nf-core/regenie/splitl0/meta.yml +++ b/modules/nf-core/regenie/splitl0/meta.yml @@ -4,6 +4,7 @@ description: Split REGENIE step 1 level-0 ridge-regression blocks into parallel keywords: - regenie - gwas + - genome-wide association study - association - genomics - parallel @@ -141,9 +142,6 @@ authors: maintainers: - "@lyh970817" containers: - conda: - linux_amd64: - lock_file: "modules/nf-core/regenie/splitl0/.conda-lock/linux_amd64-bd-5d361f9fcb2f85cf_1.txt" docker: linux_amd64: build_id: "bd-5d361f9fcb2f85cf_1" From f2ee851b9341a3c49ce9a97bf480b2f95bbcf823 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Thu, 28 May 2026 18:15:17 +0800 Subject: [PATCH 03/13] Use shared test params for REGENIE splitl0 --- .../regenie/splitl0/tests/main.nf.test | 162 +++++++++++++++++- .../regenie/splitl0/tests/main.nf.test.snap | 103 ++++++++++- .../regenie/splitl0/tests/nextflow.config | 8 +- 3 files changed, 261 insertions(+), 12 deletions(-) diff --git a/modules/nf-core/regenie/splitl0/tests/main.nf.test b/modules/nf-core/regenie/splitl0/tests/main.nf.test index 3932aca10356..740de93fbc5a 100644 --- a/modules/nf-core/regenie/splitl0/tests/main.nf.test +++ b/modules/nf-core/regenie/splitl0/tests/main.nf.test @@ -1,9 +1,9 @@ nextflow_process { name "Test Process REGENIE_SPLITL0" - config "./nextflow.config" script "../main.nf" process "REGENIE_SPLITL0" + config "./nextflow.config" tag "modules" tag "modules_nfcore" @@ -14,7 +14,7 @@ nextflow_process { when { params { - module_args = '--phenoColList QuantitativeTrait' + module_args = "--phenoColList QuantitativeTrait" } process { """ @@ -42,6 +42,11 @@ nextflow_process { } then { + def command = path(process.out.master.get(0).get(1)).parent.resolve('.command.sh').text + .replaceAll(/\\\s*\n/, ' ') + .replaceAll(/\s+/, ' ') + .trim() + assertAll( { assert process.success }, { assert process.out.master.size() == 1 }, @@ -62,6 +67,7 @@ nextflow_process { }, { def snplists = process.out.snplists.get(0).get(1) + snplists = snplists instanceof List ? snplists : [snplists] assert snplists.size() == 2 assert snplists.collect { path(it).getFileName().toString() }.sort() == [ 'plink_simulated_job1.snplist', @@ -69,16 +75,108 @@ nextflow_process { ] }, { assert path(process.out.log.get(0).get(1)).exists() }, + { assert command.contains('--split-l0 plink_simulated,2') }, + { assert command.contains('--out plink_simulated') }, + { assert command.contains('--bed plink_simulated') }, + { assert !command.contains('--bed /') }, + { assert command.contains('--phenoColList QuantitativeTrait') }, + { assert command.contains('--covarFile plink_simulated_covariates.txt') }, + { assert command.contains('--bsize 100') }, + { + def stableMaster = process.out.master.collect { master -> + [master[0], path(master[1]).getFileName().toString()] + } + def stableSnplists = process.out.snplists.collect { snplist -> + def snplistFiles = snplist[1] instanceof List ? snplist[1] : [snplist[1]] + [snplist[0], snplistFiles.collect { path(it).getFileName().toString() }.sort()] + } + def stableLogs = process.out.log.collect { logTuple -> + [logTuple[0], path(logTuple[1]).getFileName().toString()] + } + assert snapshot( + stableMaster, + stableSnplists, + stableLogs, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + + test("homo_sapiens popgen - quantitative plink2 without covariates") { + + when { + params { + module_args = "--phenoColList QuantitativeTrait" + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [[:], []] + input[3] = [] + input[4] = 2 + """ + } + } + + then { + def command = path(process.out.master.get(0).get(1)).parent.resolve('.command.sh').text + .replaceAll(/\\\s*\n/, ' ') + .replaceAll(/\s+/, ' ') + .trim() + + assertAll( + { assert process.success }, + { assert process.out.master.size() == 1 }, + { assert process.out.snplists.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.master.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.snplists.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, + { + def snplists = process.out.snplists.get(0).get(1) + snplists = snplists instanceof List ? snplists : [snplists] + assert snplists.size() >= 1 + assert snplists.collect { path(it).getFileName().toString() }.sort() == [ + 'plink_simulated_job1.snplist' + ] + }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { assert command.contains('--split-l0 plink_simulated,2') }, + { assert command.contains('--out plink_simulated') }, + { assert command.contains('--pgen plink_simulated') }, + { assert !command.contains('--pgen /') }, + { assert command.contains('--phenoColList QuantitativeTrait') }, + { assert !command.contains('--covarFile') }, + { assert command.contains('--bsize 1000') }, { def stableMaster = process.out.master.collect { master -> [master[0], path(master[1]).getFileName().toString()] } def stableSnplists = process.out.snplists.collect { snplist -> - [snplist[0], snplist[1].collect { path(it).getFileName().toString() }.sort()] + def snplistFiles = snplist[1] instanceof List ? snplist[1] : [snplist[1]] + [snplist[0], snplistFiles.collect { path(it).getFileName().toString() }.sort()] + } + def stableLogs = process.out.log.collect { logTuple -> + [logTuple[0], path(logTuple[1]).getFileName().toString()] } assert snapshot( stableMaster, stableSnplists, + stableLogs, process.out.findAll { key, val -> key.startsWith('versions') } ).match() } @@ -92,9 +190,6 @@ nextflow_process { options "-stub" when { - params { - module_args = '--phenoColList QuantitativeTrait' - } process { """ input[0] = [ @@ -128,7 +223,60 @@ nextflow_process { [master[0], path(master[1]).getFileName().toString()] } def stableSnplists = process.out.snplists.collect { snplist -> - [snplist[0], snplist[1].collect { path(it).getFileName().toString() }.sort()] + def snplistFiles = snplist[1] instanceof List ? snplist[1] : [snplist[1]] + [snplist[0], snplistFiles.collect { path(it).getFileName().toString() }.sort()] + } + def stableLogs = process.out.log.collect { log -> + [log[0], path(log[1]).getFileName().toString()] + } + assert snapshot( + stableMaster, + stableSnplists, + stableLogs, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + + test("homo_sapiens popgen - plink2 - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [[:], []] + input[3] = [] + input[4] = 2 + """ + } + } + + then { + assertAll( + { assert process.success }, + { + def stableMaster = process.out.master.collect { master -> + [master[0], path(master[1]).getFileName().toString()] + } + def stableSnplists = process.out.snplists.collect { snplist -> + def snplistFiles = snplist[1] instanceof List ? snplist[1] : [snplist[1]] + [snplist[0], snplistFiles.collect { path(it).getFileName().toString() }.sort()] } def stableLogs = process.out.log.collect { log -> [log[0], path(log[1]).getFileName().toString()] diff --git a/modules/nf-core/regenie/splitl0/tests/main.nf.test.snap b/modules/nf-core/regenie/splitl0/tests/main.nf.test.snap index a89bab88ca8f..1afe6704f91b 100644 --- a/modules/nf-core/regenie/splitl0/tests/main.nf.test.snap +++ b/modules/nf-core/regenie/splitl0/tests/main.nf.test.snap @@ -20,6 +20,58 @@ ] ] ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.log" + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_SPLITL0", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-27T14:40:00" + }, + "homo_sapiens popgen - quantitative plink2 without covariates": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.master" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + [ + "plink_simulated_job1.snplist" + ] + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.log" + ] + ], { "versions_regenie": [ [ @@ -34,7 +86,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-05-21T22:23:00.198898381" + "timestamp": "2026-05-27T14:40:00" }, "homo_sapiens popgen - plink1 - stub": { "content": [ @@ -79,6 +131,51 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-05-21T22:23:14.857699924" + "timestamp": "2026-05-27T14:40:00" + }, + "homo_sapiens popgen - plink2 - stub": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.master" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + [ + "plink_simulated_job1.snplist", + "plink_simulated_job2.snplist" + ] + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.log" + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_SPLITL0", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-27T14:40:00" } -} \ No newline at end of file +} diff --git a/modules/nf-core/regenie/splitl0/tests/nextflow.config b/modules/nf-core/regenie/splitl0/tests/nextflow.config index a21fcfdd4745..426255745bc5 100644 --- a/modules/nf-core/regenie/splitl0/tests/nextflow.config +++ b/modules/nf-core/regenie/splitl0/tests/nextflow.config @@ -1,5 +1,9 @@ +params { + module_args = "" +} + process { - withName: REGENIE_SPLITL0 { - ext.args = params.module_args + withName: "REGENIE_SPLITL0" { + ext.args = { params.module_args ?: "" } } } From d20211770637ef6ff43f5099549206de90293196 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Wed, 27 May 2026 02:39:22 +0800 Subject: [PATCH 04/13] Add REGENIE runl0 module --- modules/nf-core/regenie/runl0/environment.yml | 7 + modules/nf-core/regenie/runl0/main.nf | 55 ++++++ modules/nf-core/regenie/runl0/meta.yml | 160 +++++++++++++++++ .../nf-core/regenie/runl0/tests/main.nf.test | 168 ++++++++++++++++++ .../regenie/runl0/tests/main.nf.test.snap | 66 +++++++ .../regenie/runl0/tests/nextflow.config | 8 + 6 files changed, 464 insertions(+) create mode 100644 modules/nf-core/regenie/runl0/environment.yml create mode 100644 modules/nf-core/regenie/runl0/main.nf create mode 100644 modules/nf-core/regenie/runl0/meta.yml create mode 100644 modules/nf-core/regenie/runl0/tests/main.nf.test create mode 100644 modules/nf-core/regenie/runl0/tests/main.nf.test.snap create mode 100644 modules/nf-core/regenie/runl0/tests/nextflow.config diff --git a/modules/nf-core/regenie/runl0/environment.yml b/modules/nf-core/regenie/runl0/environment.yml new file mode 100644 index 000000000000..98fe8277cc05 --- /dev/null +++ b/modules/nf-core/regenie/runl0/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::regenie=4.1.2" diff --git a/modules/nf-core/regenie/runl0/main.nf b/modules/nf-core/regenie/runl0/main.nf new file mode 100644 index 000000000000..7ad218b5997d --- /dev/null +++ b/modules/nf-core/regenie/runl0/main.nf @@ -0,0 +1,55 @@ +process REGENIE_RUNL0 { + tag "${meta.id}_${job_number}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data' + : 'community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf'}" + + input: + tuple val(meta), path(plink_genotype_file), path(plink_variant_file), path(plink_sample_file) + tuple val(meta2), path(master), path(snplist), val(job_number) + tuple val(meta3), path(pheno) + tuple val(meta4), path(covar) + val bsize + + output: + tuple val(meta), path("*_l0_Y*"), emit: l0_predictions + tuple val(meta), path("*.log"), emit: log + tuple val("${task.process}"), val('regenie'), eval('regenie --version 2>&1 | sed -n "1{s/^v//;s/\\.gz$//;p}"'), topic: versions, emit: versions_regenie + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def input_prefix = plink_genotype_file.baseName + def prefix = task.ext.prefix ?: input_prefix + def run_prefix = "${prefix}_job${job_number}" + def genotype_flag = plink_genotype_file.name.endsWith('.pgen') ? '--pgen' : '--bed' + def covar_arg = covar ? "--covarFile ${covar}" : '' + def bsize_arg = bsize ?: 1000 + """ + regenie \\ + --step 1 \\ + ${genotype_flag} ${input_prefix} \\ + --phenoFile ${pheno} \\ + ${covar_arg} \\ + --bsize ${bsize_arg} \\ + --gz \\ + --threads ${task.cpus} \\ + ${args} \\ + --out ${run_prefix} \\ + --run-l0 ${master},${job_number} + """ + + stub: + def input_prefix = plink_genotype_file.baseName + def prefix = task.ext.prefix ?: input_prefix + def run_prefix = "${prefix}_job${job_number}" + """ + touch ${run_prefix}_l0_Y1 + touch ${run_prefix}.log + """ +} diff --git a/modules/nf-core/regenie/runl0/meta.yml b/modules/nf-core/regenie/runl0/meta.yml new file mode 100644 index 000000000000..510ee02cfe8a --- /dev/null +++ b/modules/nf-core/regenie/runl0/meta.yml @@ -0,0 +1,160 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "regenie_runl0" +description: Run one REGENIE step 1 level-0 job from a split master file +keywords: + - regenie + - gwas + - association + - genomics + - parallel +tools: + - "regenie": + description: "Regenie is a C++ program for whole genome regression modelling of large genome-wide association studies (GWAS)." + homepage: "https://rgcgithub.github.io/regenie/" + documentation: "https://rgcgithub.github.io/regenie/options/" + tool_dev_url: "https://github.com/rgcgithub/regenie" + doi: "10.1038/s41588-021-00870-7" + licence: ["MIT"] + identifier: "biotools:regenie" + +input: + - - meta: + type: map + description: | + Groovy Map containing genotype information + Keep only the genotype analysis identifier in this map + REGENIE consumes the staged basename of `plink_genotype_file` as the `--bed` or `--pgen` prefix, so the `.bed/.bim/.fam` or `.pgen/.pvar/.psam` files must share one basename + e.g. `[ id:'cohort' ]` + - plink_genotype_file: + type: file + description: PLINK primary genotype file in BED or PGEN format + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" # BED + - plink_variant_file: + type: file + description: PLINK variant metadata file in BIM or PVAR format + pattern: "*.{bim,pvar,zst}" + ontologies: [] + - plink_sample_file: + type: file + description: PLINK sample metadata file in FAM or PSAM format + pattern: "*.{fam,psam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing split level-0 job information + e.g. `[ id:'plink_simulated' ]` + - master: + type: file + description: REGENIE split level-0 master file from `regenie/splitl0` + pattern: "*.master" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + - snplist: + type: file + description: Per-job variant list staged because the master file references it; the path is not passed explicitly to REGENIE + pattern: "*_job*.snplist" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + - job_number: + type: integer + description: Level-0 job number passed as the second value to `--run-l0` + - - meta3: + type: map + description: | + Groovy Map containing genotype/sample information associated with the phenotype file input + Use the same phenotype file and phenotype-selection arguments for all `regenie/splitl0`, `regenie/runl0`, and `regenie/runl1` jobs in the same chunked step 1 analysis + e.g. `[ id:'plink_simulated' ]` + - pheno: + type: file + description: Phenotype file passed to `--phenoFile` + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - - meta4: + type: map + description: | + Groovy Map containing genotype/sample information associated with the covariate input + Use compatible covariate inputs for all stages in the same chunked step 1 analysis + e.g. `[ id:'plink_simulated' ]` + - covar: + type: file + optional: true + description: Optional covariate file passed to `--covarFile`; provide `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - bsize: + type: integer + description: Optional block size passed to `--bsize`; pass `[]` to use the module default of `1000` + +output: + l0_predictions: + - - meta: + type: map + description: | + Groovy Map containing genotype/sample information + e.g. `[ id:'plink_simulated' ]` + - "*_l0_Y*": + type: file + description: REGENIE level-0 prediction files for this job + pattern: "*_l0_Y*" + ontologies: [] + log: + - - meta: + type: map + description: | + Groovy Map containing genotype information + e.g. `[ id:'plink_simulated' ]` + - "*.log": + type: file + description: REGENIE run level-0 log file + pattern: "*.log" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + versions_regenie: + - - "${task.process}": + type: string + description: The process the versions were collected from + - "regenie": + type: string + description: The tool name + - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"': + type: eval + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - regenie: + type: string + description: The tool name + - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"': + type: eval + description: The command used to generate the version of the tool + +notes: | + `task.ext.args` is passed directly to REGENIE and can be used for stage-consistent options such as `--phenoColList`, `--bt`, `--loocv`, or `--keep-l0`. + The same phenotype file, phenotype-selection arguments, trait mode arguments such as `--bt`, and compatible genotype/covariate inputs must be used across `regenie/splitl0`, every matching `regenie/runl0` job, and `regenie/runl1`. +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" +containers: + conda: + linux_amd64: + lock_file: "modules/nf-core/regenie/runl0/.conda-lock/linux_amd64-bd-5d361f9fcb2f85cf_1.txt" + docker: + linux_amd64: + build_id: "bd-5d361f9fcb2f85cf_1" + name: "community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf" + scanId: "sc-cc9eb5ed5eb381dd_2" + singularity: + linux_amd64: + build_id: "bd-7c121fb4ecd57890_1" + name: "oras://community.wave.seqera.io/library/regenie:4.1.2--7c121fb4ecd57890" + https: "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data" diff --git a/modules/nf-core/regenie/runl0/tests/main.nf.test b/modules/nf-core/regenie/runl0/tests/main.nf.test new file mode 100644 index 000000000000..a5d1cda3011a --- /dev/null +++ b/modules/nf-core/regenie/runl0/tests/main.nf.test @@ -0,0 +1,168 @@ +nextflow_process { + + name "Test Process REGENIE_RUNL0" + config "./nextflow.config" + script "../main.nf" + process "REGENIE_RUNL0" + + tag "modules" + tag "modules_nfcore" + tag "regenie" + tag "regenie/splitl0" + tag "regenie/runl0" + + setup { + run("REGENIE_SPLITL0") { + script "../../splitl0/main.nf" + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[3] = 100 + input[4] = 2 + """ + } + } + } + + test("homo_sapiens popgen - quantitative plink1 with covariates") { + + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = REGENIE_SPLITL0.out.master + .combine(REGENIE_SPLITL0.out.snplists) + .map { master_meta, master, snplist_meta, snplists -> + [ master_meta, master, snplists.find { snplist -> snplist.getFileName().toString().contains('_job1.snplist') }, 1 ] + } + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[3] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[4] = 100 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.l0_predictions.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.l0_predictions.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, + { + def predictionFiles = process.out.l0_predictions.get(0).get(1) + predictionFiles = predictionFiles instanceof List ? predictionFiles : [predictionFiles] + assert predictionFiles.size() >= 1 + assert predictionFiles.every { path(it).getFileName().toString().contains('_l0_Y') } + }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { + def stablePredictions = process.out.l0_predictions.collect { prediction -> + def predictionFiles = prediction[1] instanceof List ? prediction[1] : [prediction[1]] + [prediction[0], predictionFiles.collect { path(it).getFileName().toString() }.sort()] + } + assert snapshot( + stablePredictions, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + + test("homo_sapiens popgen - plink1 - stub") { + + options "-stub" + + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = REGENIE_SPLITL0.out.master + .combine(REGENIE_SPLITL0.out.snplists) + .map { master_meta, master, snplist_meta, snplists -> + [ master_meta, master, snplists.find { snplist -> snplist.getFileName().toString().contains('_job1.snplist') }, 1 ] + } + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[3] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[4] = 100 + """ + } + } + + then { + assertAll( + { assert process.success }, + { + def stablePredictions = process.out.l0_predictions.collect { prediction -> + def predictionFiles = prediction[1] instanceof List ? prediction[1] : [prediction[1]] + [prediction[0], predictionFiles.collect { path(it).getFileName().toString() }.sort()] + } + def stableLogs = process.out.log.collect { log -> + [log[0], path(log[1]).getFileName().toString()] + } + assert snapshot( + stablePredictions, + stableLogs, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/regenie/runl0/tests/main.nf.test.snap b/modules/nf-core/regenie/runl0/tests/main.nf.test.snap new file mode 100644 index 000000000000..66bd3fc8bbba --- /dev/null +++ b/modules/nf-core/regenie/runl0/tests/main.nf.test.snap @@ -0,0 +1,66 @@ +{ + "homo_sapiens popgen - quantitative plink1 with covariates": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + [ + "plink_simulated_job1_l0_Y1" + ] + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_RUNL0", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-21T22:35:29.736647195" + }, + "homo_sapiens popgen - plink1 - stub": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + [ + "plink_simulated_job1_l0_Y1" + ] + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_job1.log" + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_RUNL0", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-21T22:32:15.576585394" + } +} \ No newline at end of file diff --git a/modules/nf-core/regenie/runl0/tests/nextflow.config b/modules/nf-core/regenie/runl0/tests/nextflow.config new file mode 100644 index 000000000000..5334b9809f5e --- /dev/null +++ b/modules/nf-core/regenie/runl0/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: REGENIE_SPLITL0 { + ext.args = params.module_args + } + withName: REGENIE_RUNL0 { + ext.args = params.module_args + } +} From 04c81a5cbd5f81576161ad7db0a2a92d7486fb21 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Thu, 28 May 2026 18:15:17 +0800 Subject: [PATCH 05/13] Update REGENIE runl0 metadata --- modules/nf-core/regenie/runl0/meta.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/nf-core/regenie/runl0/meta.yml b/modules/nf-core/regenie/runl0/meta.yml index 510ee02cfe8a..afc2ca038953 100644 --- a/modules/nf-core/regenie/runl0/meta.yml +++ b/modules/nf-core/regenie/runl0/meta.yml @@ -4,6 +4,7 @@ description: Run one REGENIE step 1 level-0 job from a split master file keywords: - regenie - gwas + - genome-wide association study - association - genomics - parallel @@ -145,9 +146,6 @@ authors: maintainers: - "@lyh970817" containers: - conda: - linux_amd64: - lock_file: "modules/nf-core/regenie/runl0/.conda-lock/linux_amd64-bd-5d361f9fcb2f85cf_1.txt" docker: linux_amd64: build_id: "bd-5d361f9fcb2f85cf_1" From c49c9876e4fa30b86ad2d82f46c84c21d91ebaff Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Thu, 28 May 2026 18:15:17 +0800 Subject: [PATCH 06/13] Use shared test params for REGENIE runl0 --- .../nf-core/regenie/runl0/tests/main.nf.test | 147 ++++++++++++++++-- .../regenie/runl0/tests/main.nf.test.snap | 98 ++++++++---- .../regenie/runl0/tests/nextflow.config | 13 +- 3 files changed, 211 insertions(+), 47 deletions(-) diff --git a/modules/nf-core/regenie/runl0/tests/main.nf.test b/modules/nf-core/regenie/runl0/tests/main.nf.test index a5d1cda3011a..bc20ff355e96 100644 --- a/modules/nf-core/regenie/runl0/tests/main.nf.test +++ b/modules/nf-core/regenie/runl0/tests/main.nf.test @@ -1,9 +1,9 @@ nextflow_process { name "Test Process REGENIE_RUNL0" - config "./nextflow.config" script "../main.nf" process "REGENIE_RUNL0" + config "./nextflow.config" tag "modules" tag "modules_nfcore" @@ -38,13 +38,37 @@ nextflow_process { """ } } + + run("REGENIE_SPLITL0", alias: "REGENIE_SPLITL0_PLINK2") { + script "../../splitl0/main.nf" + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [[:], []] + input[3] = [] + input[4] = 2 + """ + } + } + } test("homo_sapiens popgen - quantitative plink1 with covariates") { when { params { - module_args = '--phenoColList QuantitativeTrait' + module_args = "--phenoColList QuantitativeTrait" } process { """ @@ -58,7 +82,7 @@ nextflow_process { input[1] = REGENIE_SPLITL0.out.master .combine(REGENIE_SPLITL0.out.snplists) .map { master_meta, master, snplist_meta, snplists -> - [ master_meta, master, snplists.find { snplist -> snplist.getFileName().toString().contains('_job1.snplist') }, 1 ] + [ master_meta, master, snplists, 1 ] } input[2] = [ @@ -90,15 +114,31 @@ nextflow_process { assert predictionFiles.every { path(it).getFileName().toString().contains('_l0_Y') } }, { assert path(process.out.log.get(0).get(1)).exists() }, + { + def predictionFiles = process.out.l0_predictions.get(0).get(1) + predictionFiles = predictionFiles instanceof List ? predictionFiles : [predictionFiles] + def command = path(predictionFiles[0]).parent.resolve('.command.sh').text.replaceAll(/\s+/, ' ') + assert command.contains('--run-l0 plink_simulated.master,1') + assert command.contains('--out plink_simulated_job1') + assert command.contains('--bed plink_simulated') + assert !command.contains('--pgen plink_simulated') + assert command.contains('--phenoColList QuantitativeTrait') + assert command.contains('--covarFile plink_simulated_covariates.txt') + assert command.contains('--bsize 100') + }, { def stablePredictions = process.out.l0_predictions.collect { prediction -> def predictionFiles = prediction[1] instanceof List ? prediction[1] : [prediction[1]] [prediction[0], predictionFiles.collect { path(it).getFileName().toString() }.sort()] } - assert snapshot( - stablePredictions, - process.out.findAll { key, val -> key.startsWith('versions') } - ).match() + def stableLogs = process.out.log.collect { logTuple -> + [logTuple[0], path(logTuple[1]).getFileName().toString()] + } + assert snapshot([ + l0_predictions: stablePredictions, + log: stableLogs, + versions_regenie: process.out.versions_regenie + ]).match() } ) } @@ -110,9 +150,6 @@ nextflow_process { options "-stub" when { - params { - module_args = '--phenoColList QuantitativeTrait' - } process { """ input[0] = [ @@ -125,7 +162,7 @@ nextflow_process { input[1] = REGENIE_SPLITL0.out.master .combine(REGENIE_SPLITL0.out.snplists) .map { master_meta, master, snplist_meta, snplists -> - [ master_meta, master, snplists.find { snplist -> snplist.getFileName().toString().contains('_job1.snplist') }, 1 ] + [ master_meta, master, snplists, 1 ] } input[2] = [ @@ -154,11 +191,89 @@ nextflow_process { def stableLogs = process.out.log.collect { log -> [log[0], path(log[1]).getFileName().toString()] } - assert snapshot( - stablePredictions, - stableLogs, - process.out.findAll { key, val -> key.startsWith('versions') } - ).match() + assert snapshot([ + l0_predictions: stablePredictions, + log: stableLogs, + versions_regenie: process.out.versions_regenie + ]).match() + } + ) + } + + } + + test("homo_sapiens popgen - quantitative plink2 without covariates and default bsize") { + + when { + params { + module_args = "--phenoColList QuantitativeTrait" + module_prefix = "plink_simulated_plink2" + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) + ] + + input[1] = REGENIE_SPLITL0_PLINK2.out.master + .combine(REGENIE_SPLITL0_PLINK2.out.snplists) + .map { master_meta, master, snplist_meta, snplists -> + [ master_meta, master, snplists, 1 ] + } + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[3] = [[:], []] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.l0_predictions.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.l0_predictions.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, + { + def predictionFiles = process.out.l0_predictions.get(0).get(1) + predictionFiles = predictionFiles instanceof List ? predictionFiles : [predictionFiles] + assert predictionFiles.size() >= 1 + assert predictionFiles.every { path(it).getFileName().toString().contains('_l0_Y') } + }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { + def predictionFiles = process.out.l0_predictions.get(0).get(1) + predictionFiles = predictionFiles instanceof List ? predictionFiles : [predictionFiles] + def command = path(predictionFiles[0]).parent.resolve('.command.sh').text.replaceAll(/\s+/, ' ') + assert command.contains('--run-l0 plink_simulated_plink2.master,1') + assert command.contains('--out plink_simulated_plink2_job1') + assert command.contains('--pgen plink_simulated') + assert !command.contains('--bed plink_simulated') + assert command.contains('--phenoColList QuantitativeTrait') + assert !command.contains('--covarFile') + assert command.contains('--bsize 1000') + }, + { + def stablePredictions = process.out.l0_predictions.collect { prediction -> + def predictionFiles = prediction[1] instanceof List ? prediction[1] : [prediction[1]] + [prediction[0], predictionFiles.collect { path(it).getFileName().toString() }.sort()] + } + def stableLogs = process.out.log.collect { log -> + [log[0], path(log[1]).getFileName().toString()] + } + assert snapshot([ + l0_predictions: stablePredictions, + log: stableLogs, + versions_regenie: process.out.versions_regenie + ]).match() } ) } diff --git a/modules/nf-core/regenie/runl0/tests/main.nf.test.snap b/modules/nf-core/regenie/runl0/tests/main.nf.test.snap index 66bd3fc8bbba..8369547a0ef9 100644 --- a/modules/nf-core/regenie/runl0/tests/main.nf.test.snap +++ b/modules/nf-core/regenie/runl0/tests/main.nf.test.snap @@ -1,17 +1,61 @@ { "homo_sapiens popgen - quantitative plink1 with covariates": { "content": [ - [ - [ - { - "id": "plink_simulated" - }, + { + "l0_predictions": [ + [ + { + "id": "plink_simulated" + }, + [ + "plink_simulated_job1_l0_Y1" + ] + ] + ], + "log": [ [ - "plink_simulated_job1_l0_Y1" + { + "id": "plink_simulated" + }, + "plink_simulated_job1.log" + ] + ], + "versions_regenie": [ + [ + "REGENIE_RUNL0", + "regenie", + "4.1.2" ] ] - ], + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-27T19:26:56.972614171" + }, + "homo_sapiens popgen - quantitative plink2 without covariates and default bsize": { + "content": [ { + "l0_predictions": [ + [ + { + "id": "plink_simulated" + }, + [ + "plink_simulated_plink2_job1_l0_Y1" + ] + ] + ], + "log": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_plink2_job1.log" + ] + ], "versions_regenie": [ [ "REGENIE_RUNL0", @@ -25,29 +69,29 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-05-21T22:35:29.736647195" + "timestamp": "2026-05-27T19:38:02.902905979" }, "homo_sapiens popgen - plink1 - stub": { "content": [ - [ - [ - { - "id": "plink_simulated" - }, + { + "l0_predictions": [ [ - "plink_simulated_job1_l0_Y1" + { + "id": "plink_simulated" + }, + [ + "plink_simulated_job1_l0_Y1" + ] ] - ] - ], - [ - [ - { - "id": "plink_simulated" - }, - "plink_simulated_job1.log" - ] - ], - { + ], + "log": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_job1.log" + ] + ], "versions_regenie": [ [ "REGENIE_RUNL0", @@ -61,6 +105,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-05-21T22:32:15.576585394" + "timestamp": "2026-05-27T14:43:11.65601525" } -} \ No newline at end of file +} diff --git a/modules/nf-core/regenie/runl0/tests/nextflow.config b/modules/nf-core/regenie/runl0/tests/nextflow.config index 5334b9809f5e..669cbe2db74d 100644 --- a/modules/nf-core/regenie/runl0/tests/nextflow.config +++ b/modules/nf-core/regenie/runl0/tests/nextflow.config @@ -1,8 +1,13 @@ +params { + module_args = "" + module_prefix = null +} + process { - withName: REGENIE_SPLITL0 { - ext.args = params.module_args + withName: "REGENIE_SPLITL0|REGENIE_RUNL0|REGENIE_SPLITL0_PLINK2" { + ext.args = { params.module_args ?: "" } } - withName: REGENIE_RUNL0 { - ext.args = params.module_args + withName: "REGENIE_SPLITL0_PLINK2|REGENIE_RUNL0" { + ext.prefix = { params.module_prefix } } } From 71389f70fc2ab81d9384fa7cee3c55eb69970ee6 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Wed, 27 May 2026 02:40:28 +0800 Subject: [PATCH 07/13] Add REGENIE runl1 module --- modules/nf-core/regenie/runl1/environment.yml | 7 + modules/nf-core/regenie/runl1/main.nf | 58 +++++ modules/nf-core/regenie/runl1/meta.yml | 174 +++++++++++++ .../nf-core/regenie/runl1/tests/main.nf.test | 243 ++++++++++++++++++ .../regenie/runl1/tests/main.nf.test.snap | 78 ++++++ .../regenie/runl1/tests/nextflow.config | 11 + 6 files changed, 571 insertions(+) create mode 100644 modules/nf-core/regenie/runl1/environment.yml create mode 100644 modules/nf-core/regenie/runl1/main.nf create mode 100644 modules/nf-core/regenie/runl1/meta.yml create mode 100644 modules/nf-core/regenie/runl1/tests/main.nf.test create mode 100644 modules/nf-core/regenie/runl1/tests/main.nf.test.snap create mode 100644 modules/nf-core/regenie/runl1/tests/nextflow.config diff --git a/modules/nf-core/regenie/runl1/environment.yml b/modules/nf-core/regenie/runl1/environment.yml new file mode 100644 index 000000000000..98fe8277cc05 --- /dev/null +++ b/modules/nf-core/regenie/runl1/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::regenie=4.1.2" diff --git a/modules/nf-core/regenie/runl1/main.nf b/modules/nf-core/regenie/runl1/main.nf new file mode 100644 index 000000000000..beba8d8deb7f --- /dev/null +++ b/modules/nf-core/regenie/runl1/main.nf @@ -0,0 +1,58 @@ +process REGENIE_RUNL1 { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data' + : 'community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf'}" + + input: + tuple val(meta), path(plink_genotype_file), path(plink_variant_file), path(plink_sample_file) + tuple val(meta2), path(master), path(snplists), path(l0_predictions) + tuple val(meta3), path(pheno) + tuple val(meta4), path(covar) + val bsize + + output: + tuple val(meta), path("*_pred.list"), emit: predictions + tuple val(meta), path("*.loco.gz"), emit: loco + tuple val(meta), path("*.log"), emit: log + tuple val("${task.process}"), val('regenie'), eval('regenie --version 2>&1 | sed -n "1{s/^v//;s/\\.gz$//;p}"'), topic: versions, emit: versions_regenie + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def input_prefix = plink_genotype_file.baseName + def prefix = task.ext.prefix ?: input_prefix + def genotype_flag = plink_genotype_file.name.endsWith('.pgen') ? '--pgen' : '--bed' + def covar_arg = covar ? "--covarFile ${covar}" : '' + def bsize_arg = bsize ?: 1000 + """ + regenie \\ + --step 1 \\ + ${genotype_flag} ${input_prefix} \\ + --phenoFile ${pheno} \\ + ${covar_arg} \\ + --bsize ${bsize_arg} \\ + --gz \\ + --threads ${task.cpus} \\ + ${args} \\ + --out ${prefix} \\ + --run-l1 ${master} + """ + + stub: + def args = task.ext.args ?: '' + def input_prefix = plink_genotype_file.baseName + def prefix = task.ext.prefix ?: input_prefix + def pheno_match = args =~ /--phenoColList\s+(\S+)/ + def pheno_name = pheno_match.find() ? pheno_match.group(1) : 'Y1' + """ + echo "${pheno_name} ${prefix}_1.loco.gz" > ${prefix}_pred.list + echo "" | gzip > ${prefix}_1.loco.gz + touch ${prefix}.log + """ +} diff --git a/modules/nf-core/regenie/runl1/meta.yml b/modules/nf-core/regenie/runl1/meta.yml new file mode 100644 index 000000000000..ca880ef22f56 --- /dev/null +++ b/modules/nf-core/regenie/runl1/meta.yml @@ -0,0 +1,174 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "regenie_runl1" +description: Finish REGENIE step 1 from split level-0 prediction files +keywords: + - regenie + - gwas + - association + - genomics + - parallel +tools: + - "regenie": + description: "Regenie is a C++ program for whole genome regression modelling of large genome-wide association studies (GWAS)." + homepage: "https://rgcgithub.github.io/regenie/" + documentation: "https://rgcgithub.github.io/regenie/options/" + tool_dev_url: "https://github.com/rgcgithub/regenie" + doi: "10.1038/s41588-021-00870-7" + licence: ["MIT"] + identifier: "biotools:regenie" + +input: + - - meta: + type: map + description: | + Groovy Map containing genotype information + Keep only the genotype analysis identifier in this map + REGENIE consumes the staged basename of `plink_genotype_file` as the `--bed` or `--pgen` prefix, so the `.bed/.bim/.fam` or `.pgen/.pvar/.psam` files must share one basename + e.g. `[ id:'cohort' ]` + - plink_genotype_file: + type: file + description: PLINK primary genotype file in BED or PGEN format + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" # BED + - plink_variant_file: + type: file + description: PLINK variant metadata file in BIM or PVAR format + pattern: "*.{bim,pvar,zst}" + ontologies: [] + - plink_sample_file: + type: file + description: PLINK sample metadata file in FAM or PSAM format + pattern: "*.{fam,psam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing split level-0 output information + e.g. `[ id:'plink_simulated' ]` + - master: + type: file + description: REGENIE split level-0 master file from `regenie/splitl0` + pattern: "*.master" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + - snplists: + type: file + description: All per-job variant list files referenced by the master file, staged so `--run-l1` can resolve them + pattern: "*_job*.snplist" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + - l0_predictions: + type: file + description: All level-0 prediction files referenced by the master file, staged so `--run-l1` can resolve them + pattern: "*_l0_Y*" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing genotype/sample information associated with the phenotype file input + Use the same phenotype file and phenotype-selection arguments for all `regenie/splitl0`, `regenie/runl0`, and `regenie/runl1` jobs in the same chunked step 1 analysis + e.g. `[ id:'plink_simulated' ]` + - pheno: + type: file + description: Phenotype file passed to `--phenoFile` + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - - meta4: + type: map + description: | + Groovy Map containing genotype/sample information associated with the covariate input + Use compatible covariate inputs for all stages in the same chunked step 1 analysis + e.g. `[ id:'plink_simulated' ]` + - covar: + type: file + optional: true + description: Optional covariate file passed to `--covarFile`; provide `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" # TSV + - bsize: + type: integer + description: Optional block size passed to `--bsize`; pass `[]` to use the module default of `1000` + +output: + predictions: + - - meta: + type: map + description: | + Groovy Map containing genotype/sample information + e.g. `[ id:'plink_simulated' ]` + - "*_pred.list": + type: file + description: REGENIE prediction list file + pattern: "*_pred.list" + ontologies: [] + loco: + - - meta: + type: map + description: | + Groovy Map containing genotype/sample information + e.g. `[ id:'plink_simulated' ]` + - "*.loco.gz": + type: file + description: REGENIE LOCO prediction files + pattern: "*.loco.gz" + ontologies: + - edam: "http://edamontology.org/format_3987" # GZIP + log: + - - meta: + type: map + description: | + Groovy Map containing genotype information + e.g. `[ id:'plink_simulated' ]` + - "*.log": + type: file + description: REGENIE run level-1 log file + pattern: "*.log" + ontologies: + - edam: "http://edamontology.org/format_2330" # Text + versions_regenie: + - - "${task.process}": + type: string + description: The process the versions were collected from + - "regenie": + type: string + description: The tool name + - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"': + type: eval + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - regenie: + type: string + description: The tool name + - 'regenie --version 2>&1 | sed -n "1{s/^v//;s/\.gz$//;p}"': + type: eval + description: The command used to generate the version of the tool + +notes: | + `task.ext.args` is passed directly to REGENIE and can be used for stage-consistent options such as `--phenoColList`, `--bt`, `--loocv`, `--keep-l0`, or `--l1-phenoList`. + The same phenotype file, phenotype-selection arguments, trait mode arguments such as `--bt`, and compatible genotype/covariate inputs must be used across `regenie/splitl0`, every matching `regenie/runl0` job, and `regenie/runl1`. +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" +containers: + conda: + linux_amd64: + lock_file: "modules/nf-core/regenie/runl1/.conda-lock/linux_amd64-bd-5d361f9fcb2f85cf_1.txt" + docker: + linux_amd64: + build_id: "bd-5d361f9fcb2f85cf_1" + name: "community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf" + scanId: "sc-cc9eb5ed5eb381dd_2" + singularity: + linux_amd64: + build_id: "bd-7c121fb4ecd57890_1" + name: "oras://community.wave.seqera.io/library/regenie:4.1.2--7c121fb4ecd57890" + https: "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data" diff --git a/modules/nf-core/regenie/runl1/tests/main.nf.test b/modules/nf-core/regenie/runl1/tests/main.nf.test new file mode 100644 index 000000000000..12a4a947cfbb --- /dev/null +++ b/modules/nf-core/regenie/runl1/tests/main.nf.test @@ -0,0 +1,243 @@ +nextflow_process { + + name "Test Process REGENIE_RUNL1" + config "./nextflow.config" + script "../main.nf" + process "REGENIE_RUNL1" + + tag "modules" + tag "modules_nfcore" + tag "regenie" + tag "regenie/splitl0" + tag "regenie/runl0" + tag "regenie/runl1" + + setup { + run("REGENIE_SPLITL0") { + script "../../splitl0/main.nf" + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[3] = 100 + input[4] = 2 + """ + } + } + + run("REGENIE_RUNL0") { + script "../../runl0/main.nf" + process { + """ + input[0] = Channel.of( + [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ], + [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ) + + input[1] = REGENIE_SPLITL0.out.master + .combine(REGENIE_SPLITL0.out.snplists) + .flatMap { master_meta, master, snplist_meta, snplists -> + (1..2).collect { job -> + [ + master_meta, + master, + snplists.find { snplist -> snplist.getFileName().toString().contains('_job' + job + '.snplist') }, + job + ] + } + } + + input[2] = Channel.of( + [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ], + [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + ) + + input[3] = Channel.of( + [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ], + [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + ) + + input[4] = Channel.of(100, 100) + """ + } + } + } + + test("homo_sapiens popgen - quantitative plink1 with covariates") { + + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = REGENIE_SPLITL0.out.master + .combine(REGENIE_SPLITL0.out.snplists) + .combine(REGENIE_RUNL0.out.l0_predictions.map { it[1] }.collect()) + .map { values -> + [ values[0], values[1], values[3], values.drop(4).flatten() ] + } + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[3] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[4] = 100 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.predictions.size() == 1 }, + { assert process.out.loco.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { + def predList = path(process.out.predictions.get(0).get(1)) + def locoFile = path(process.out.loco.get(0).get(1)) + def predListLines = predList.text.readLines().findAll { it } + assert predList.exists() + assert predListLines.size() == 1 + def predListFields = predListLines[0].split(/\s+/) + assert predListFields.size() == 2 + assert predListFields[0] == 'QuantitativeTrait' + assert predListFields[1] == locoFile.toString() + }, + { + def stablePredictions = process.out.predictions.collect { prediction -> + [prediction[0], path(prediction[1]).getFileName().toString()] + } + def stableLoco = process.out.loco.collect { loco -> + [loco[0], path(loco[1]).getFileName().toString()] + } + assert snapshot( + stablePredictions, + stableLoco, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + + test("homo_sapiens popgen - plink1 - stub") { + + options "-stub" + + when { + params { + module_args = '--phenoColList QuantitativeTrait' + } + process { + """ + input[0] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + + input[1] = REGENIE_SPLITL0.out.master + .combine(REGENIE_SPLITL0.out.snplists) + .combine(REGENIE_RUNL0.out.l0_predictions.map { it[1] }.collect()) + .map { values -> + [ values[0], values[1], values[3], values.drop(4).flatten() ] + } + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[3] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + + input[4] = 100 + """ + } + } + + then { + assertAll( + { assert process.success }, + { + def stablePredictions = process.out.predictions.collect { prediction -> + [prediction[0], path(prediction[1]).getFileName().toString()] + } + def stableLoco = process.out.loco.collect { loco -> + [loco[0], path(loco[1]).getFileName().toString()] + } + def stableLogs = process.out.log.collect { log -> + [log[0], path(log[1]).getFileName().toString()] + } + assert snapshot( + stablePredictions, + stableLoco, + stableLogs, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/regenie/runl1/tests/main.nf.test.snap b/modules/nf-core/regenie/runl1/tests/main.nf.test.snap new file mode 100644 index 000000000000..96094031b4b5 --- /dev/null +++ b/modules/nf-core/regenie/runl1/tests/main.nf.test.snap @@ -0,0 +1,78 @@ +{ + "homo_sapiens popgen - quantitative plink1 with covariates": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_pred.list" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_1.loco.gz" + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_RUNL1", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-21T22:36:12.351327082" + }, + "homo_sapiens popgen - plink1 - stub": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_pred.list" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_1.loco.gz" + ] + ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.log" + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_RUNL1", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-21T22:37:19.177658994" + } +} \ No newline at end of file diff --git a/modules/nf-core/regenie/runl1/tests/nextflow.config b/modules/nf-core/regenie/runl1/tests/nextflow.config new file mode 100644 index 000000000000..f66bd1ce1c45 --- /dev/null +++ b/modules/nf-core/regenie/runl1/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + withName: REGENIE_SPLITL0 { + ext.args = params.module_args + } + withName: REGENIE_RUNL0 { + ext.args = params.module_args + } + withName: REGENIE_RUNL1 { + ext.args = params.module_args + } +} From 5048e824ece0f7cf13980aee474648c0fcb8369b Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Thu, 28 May 2026 18:15:17 +0800 Subject: [PATCH 08/13] Update REGENIE runl1 metadata --- modules/nf-core/regenie/runl1/meta.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/nf-core/regenie/runl1/meta.yml b/modules/nf-core/regenie/runl1/meta.yml index ca880ef22f56..012458cbf253 100644 --- a/modules/nf-core/regenie/runl1/meta.yml +++ b/modules/nf-core/regenie/runl1/meta.yml @@ -4,6 +4,7 @@ description: Finish REGENIE step 1 from split level-0 prediction files keywords: - regenie - gwas + - genome-wide association study - association - genomics - parallel @@ -159,9 +160,6 @@ authors: maintainers: - "@lyh970817" containers: - conda: - linux_amd64: - lock_file: "modules/nf-core/regenie/runl1/.conda-lock/linux_amd64-bd-5d361f9fcb2f85cf_1.txt" docker: linux_amd64: build_id: "bd-5d361f9fcb2f85cf_1" From 5d65a9475f3bc93e349aa936a61925786b38c2d6 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Thu, 28 May 2026 18:15:17 +0800 Subject: [PATCH 09/13] Use shared test params for REGENIE runl1 --- .../nf-core/regenie/runl1/tests/main.nf.test | 170 ++++++++++++++++-- .../regenie/runl1/tests/main.nf.test.snap | 56 +++++- .../regenie/runl1/tests/nextflow.config | 16 +- 3 files changed, 221 insertions(+), 21 deletions(-) diff --git a/modules/nf-core/regenie/runl1/tests/main.nf.test b/modules/nf-core/regenie/runl1/tests/main.nf.test index 12a4a947cfbb..355fdd9820ed 100644 --- a/modules/nf-core/regenie/runl1/tests/main.nf.test +++ b/modules/nf-core/regenie/runl1/tests/main.nf.test @@ -1,9 +1,9 @@ nextflow_process { name "Test Process REGENIE_RUNL1" - config "./nextflow.config" script "../main.nf" process "REGENIE_RUNL1" + config "./nextflow.config" tag "modules" tag "modules_nfcore" @@ -63,12 +63,7 @@ nextflow_process { .combine(REGENIE_SPLITL0.out.snplists) .flatMap { master_meta, master, snplist_meta, snplists -> (1..2).collect { job -> - [ - master_meta, - master, - snplists.find { snplist -> snplist.getFileName().toString().contains('_job' + job + '.snplist') }, - job - ] + [master_meta, master, snplists, job] } } @@ -98,13 +93,64 @@ nextflow_process { """ } } + + run("REGENIE_SPLITL0", alias: "REGENIE_SPLITL0_PLINK2") { + script "../../splitl0/main.nf" + process { + """ + input[0] = [ + [ id:'plink_simulated_plink2' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) + ] + + input[1] = [ + [ id:'plink_simulated_plink2' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[2] = [[:], []] + input[3] = [] + input[4] = 2 + """ + } + } + + run("REGENIE_RUNL0", alias: "REGENIE_RUNL0_PLINK2") { + script "../../runl0/main.nf" + process { + """ + input[0] = [ + [ id:'plink_simulated_plink2' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) + ] + + input[1] = REGENIE_SPLITL0_PLINK2.out.master + .combine(REGENIE_SPLITL0_PLINK2.out.snplists) + .map { master_meta, master, snplist_meta, snplists -> + [master_meta, master, snplists, 1] + } + + input[2] = [ + [ id:'plink_simulated_plink2' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[3] = [[:], []] + input[4] = [] + """ + } + } } test("homo_sapiens popgen - quantitative plink1 with covariates") { when { params { - module_args = '--phenoColList QuantitativeTrait' + module_args = "--phenoColList QuantitativeTrait" } process { """ @@ -147,6 +193,21 @@ nextflow_process { { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { assert path(process.out.log.get(0).get(1)).exists() }, + { + def workDir = path(process.out.predictions.get(0).get(1)).parent + def command = workDir.resolve('.command.sh').text + def stagedNames = workDir.toFile().listFiles().collect { it.name } + + assert command.contains('--run-l1 plink_simulated.master') + assert command.contains('--bed plink_simulated') + assert command.contains('--covarFile plink_simulated_covariates.txt') + assert command.contains('--bsize 100') + assert !command.contains('--pgen') + assert stagedNames.contains('plink_simulated.bed') + assert stagedNames.contains('plink_simulated.bim') + assert stagedNames.contains('plink_simulated.fam') + assert stagedNames.contains('plink_simulated.master') + }, { def predList = path(process.out.predictions.get(0).get(1)) def locoFile = path(process.out.loco.get(0).get(1)) @@ -165,9 +226,99 @@ nextflow_process { def stableLoco = process.out.loco.collect { loco -> [loco[0], path(loco[1]).getFileName().toString()] } + def stableLogs = process.out.log.collect { log -> + [log[0], path(log[1]).getFileName().toString()] + } assert snapshot( stablePredictions, stableLoco, + stableLogs, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + + } + + test("homo_sapiens popgen - quantitative plink2 without covariates") { + + when { + params { + module_args = "--phenoColList QuantitativeTrait" + module_prefix = "plink_simulated_plink2" + } + process { + """ + input[0] = [ + [ id:'plink_simulated_plink2' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) + ] + + input[1] = REGENIE_SPLITL0_PLINK2.out.master + .combine(REGENIE_SPLITL0_PLINK2.out.snplists) + .combine(REGENIE_RUNL0_PLINK2.out.l0_predictions.map { it[1] }.collect()) + .map { values -> + [ values[0], values[1], values[3], values.drop(4).flatten() ] + } + + input[2] = [ + [ id:'plink_simulated' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + + input[3] = [ + [ id:'plink_simulated' ], + [] + ] + + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.predictions.size() == 1 }, + { assert process.out.loco.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.predictions.get(0).get(0).id == 'plink_simulated_plink2' }, + { assert process.out.loco.get(0).get(0).id == 'plink_simulated_plink2' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated_plink2' }, + { + def workDir = path(process.out.predictions.get(0).get(1)).parent + def command = workDir.resolve('.command.sh').text + def stagedNames = workDir.toFile().listFiles().collect { it.name } + + assert command.contains('--run-l1 plink_simulated_plink2.master') + assert command.contains('--pgen plink_simulated') + assert command.contains('--bsize 1000') + assert command.contains('--out plink_simulated_plink2') + assert !command.contains('--bed') + assert !command.contains('--covarFile') + assert command.contains('--phenoColList QuantitativeTrait') + assert stagedNames.contains('plink_simulated.pgen') + assert stagedNames.contains('plink_simulated.psam') + assert stagedNames.contains('plink_simulated.pvar') + assert stagedNames.contains('plink_simulated_plink2.master') + }, + { + def stablePredictions = process.out.predictions.collect { prediction -> + [prediction[0], path(prediction[1]).getFileName().toString()] + } + def stableLoco = process.out.loco.collect { loco -> + [loco[0], path(loco[1]).getFileName().toString()] + } + def stableLogs = process.out.log.collect { log -> + [log[0], path(log[1]).getFileName().toString()] + } + assert snapshot( + stablePredictions, + stableLoco, + stableLogs, process.out.findAll { key, val -> key.startsWith('versions') } ).match() } @@ -181,9 +332,6 @@ nextflow_process { options "-stub" when { - params { - module_args = '--phenoColList QuantitativeTrait' - } process { """ input[0] = [ diff --git a/modules/nf-core/regenie/runl1/tests/main.nf.test.snap b/modules/nf-core/regenie/runl1/tests/main.nf.test.snap index 96094031b4b5..a9b4a5dbb9a7 100644 --- a/modules/nf-core/regenie/runl1/tests/main.nf.test.snap +++ b/modules/nf-core/regenie/runl1/tests/main.nf.test.snap @@ -17,6 +17,14 @@ "plink_simulated_1.loco.gz" ] ], + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.log" + ] + ], { "versions_regenie": [ [ @@ -31,7 +39,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-05-21T22:36:12.351327082" + "timestamp": "2026-05-27T19:57:18.939854903" }, "homo_sapiens popgen - plink1 - stub": { "content": [ @@ -73,6 +81,48 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-05-21T22:37:19.177658994" + "timestamp": "2026-05-27T18:59:13.646476959" + }, + "homo_sapiens popgen - quantitative plink2 without covariates": { + "content": [ + [ + [ + { + "id": "plink_simulated_plink2" + }, + "plink_simulated_plink2_pred.list" + ] + ], + [ + [ + { + "id": "plink_simulated_plink2" + }, + "plink_simulated_plink2_1.loco.gz" + ] + ], + [ + [ + { + "id": "plink_simulated_plink2" + }, + "plink_simulated_plink2.log" + ] + ], + { + "versions_regenie": [ + [ + "REGENIE_RUNL1", + "regenie", + "4.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-27T19:57:40.719017173" } -} \ No newline at end of file +} diff --git a/modules/nf-core/regenie/runl1/tests/nextflow.config b/modules/nf-core/regenie/runl1/tests/nextflow.config index f66bd1ce1c45..aae03b1f31e8 100644 --- a/modules/nf-core/regenie/runl1/tests/nextflow.config +++ b/modules/nf-core/regenie/runl1/tests/nextflow.config @@ -1,11 +1,13 @@ +params { + module_args = "" + module_prefix = null +} + process { - withName: REGENIE_SPLITL0 { - ext.args = params.module_args - } - withName: REGENIE_RUNL0 { - ext.args = params.module_args + withName: "REGENIE_SPLITL0|REGENIE_RUNL0|REGENIE_RUNL1|REGENIE_SPLITL0_PLINK2|REGENIE_RUNL0_PLINK2" { + ext.args = { params.module_args ?: "" } } - withName: REGENIE_RUNL1 { - ext.args = params.module_args + withName: "REGENIE_SPLITL0_PLINK2|REGENIE_RUNL0_PLINK2|REGENIE_RUNL1" { + ext.prefix = { params.module_prefix } } } From cc1379c438f0ab5f7ae337beacb1a87aad4fa196 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Mon, 8 Jun 2026 00:15:27 +0800 Subject: [PATCH 10/13] Scope REGENIE runl1 test prefix override --- modules/nf-core/regenie/runl1/tests/main.nf.test | 2 +- modules/nf-core/regenie/runl1/tests/main.nf.test.snap | 6 +++--- modules/nf-core/regenie/runl1/tests/nextflow.config | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/nf-core/regenie/runl1/tests/main.nf.test b/modules/nf-core/regenie/runl1/tests/main.nf.test index 355fdd9820ed..87fe49e68b55 100644 --- a/modules/nf-core/regenie/runl1/tests/main.nf.test +++ b/modules/nf-core/regenie/runl1/tests/main.nf.test @@ -296,7 +296,7 @@ nextflow_process { assert command.contains('--run-l1 plink_simulated_plink2.master') assert command.contains('--pgen plink_simulated') assert command.contains('--bsize 1000') - assert command.contains('--out plink_simulated_plink2') + assert command.contains('--out plink_simulated') assert !command.contains('--bed') assert !command.contains('--covarFile') assert command.contains('--phenoColList QuantitativeTrait') diff --git a/modules/nf-core/regenie/runl1/tests/main.nf.test.snap b/modules/nf-core/regenie/runl1/tests/main.nf.test.snap index a9b4a5dbb9a7..a8aa3f1cea9e 100644 --- a/modules/nf-core/regenie/runl1/tests/main.nf.test.snap +++ b/modules/nf-core/regenie/runl1/tests/main.nf.test.snap @@ -90,7 +90,7 @@ { "id": "plink_simulated_plink2" }, - "plink_simulated_plink2_pred.list" + "plink_simulated_pred.list" ] ], [ @@ -98,7 +98,7 @@ { "id": "plink_simulated_plink2" }, - "plink_simulated_plink2_1.loco.gz" + "plink_simulated_1.loco.gz" ] ], [ @@ -106,7 +106,7 @@ { "id": "plink_simulated_plink2" }, - "plink_simulated_plink2.log" + "plink_simulated.log" ] ], { diff --git a/modules/nf-core/regenie/runl1/tests/nextflow.config b/modules/nf-core/regenie/runl1/tests/nextflow.config index aae03b1f31e8..a8db9c91da7e 100644 --- a/modules/nf-core/regenie/runl1/tests/nextflow.config +++ b/modules/nf-core/regenie/runl1/tests/nextflow.config @@ -7,7 +7,7 @@ process { withName: "REGENIE_SPLITL0|REGENIE_RUNL0|REGENIE_RUNL1|REGENIE_SPLITL0_PLINK2|REGENIE_RUNL0_PLINK2" { ext.args = { params.module_args ?: "" } } - withName: "REGENIE_SPLITL0_PLINK2|REGENIE_RUNL0_PLINK2|REGENIE_RUNL1" { + withName: "REGENIE_SPLITL0_PLINK2|REGENIE_RUNL0_PLINK2" { ext.prefix = { params.module_prefix } } } From 654208ae432b42dadca98aa690c288bf10004749 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Mon, 8 Jun 2026 01:07:22 +0800 Subject: [PATCH 11/13] Align REGENIE runl1 style with runl0 --- modules/nf-core/regenie/runl1/main.nf | 5 +- .../nf-core/regenie/runl1/tests/main.nf.test | 70 ++++----- .../regenie/runl1/tests/main.nf.test.snap | 148 +++++++++--------- 3 files changed, 103 insertions(+), 120 deletions(-) diff --git a/modules/nf-core/regenie/runl1/main.nf b/modules/nf-core/regenie/runl1/main.nf index beba8d8deb7f..c1d6a997be9f 100644 --- a/modules/nf-core/regenie/runl1/main.nf +++ b/modules/nf-core/regenie/runl1/main.nf @@ -45,13 +45,10 @@ process REGENIE_RUNL1 { """ stub: - def args = task.ext.args ?: '' def input_prefix = plink_genotype_file.baseName def prefix = task.ext.prefix ?: input_prefix - def pheno_match = args =~ /--phenoColList\s+(\S+)/ - def pheno_name = pheno_match.find() ? pheno_match.group(1) : 'Y1' """ - echo "${pheno_name} ${prefix}_1.loco.gz" > ${prefix}_pred.list + echo "Y1 ${prefix}_1.loco.gz" > ${prefix}_pred.list echo "" | gzip > ${prefix}_1.loco.gz touch ${prefix}.log """ diff --git a/modules/nf-core/regenie/runl1/tests/main.nf.test b/modules/nf-core/regenie/runl1/tests/main.nf.test index 87fe49e68b55..253d14466544 100644 --- a/modules/nf-core/regenie/runl1/tests/main.nf.test +++ b/modules/nf-core/regenie/runl1/tests/main.nf.test @@ -99,14 +99,14 @@ nextflow_process { process { """ input[0] = [ - [ id:'plink_simulated_plink2' ], + [ id:'plink_simulated' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) ] input[1] = [ - [ id:'plink_simulated_plink2' ], + [ id:'plink_simulated' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) ] @@ -122,7 +122,7 @@ nextflow_process { process { """ input[0] = [ - [ id:'plink_simulated_plink2' ], + [ id:'plink_simulated' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) @@ -135,7 +135,7 @@ nextflow_process { } input[2] = [ - [ id:'plink_simulated_plink2' ], + [ id:'plink_simulated' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) ] @@ -194,19 +194,12 @@ nextflow_process { { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { assert path(process.out.log.get(0).get(1)).exists() }, { - def workDir = path(process.out.predictions.get(0).get(1)).parent - def command = workDir.resolve('.command.sh').text - def stagedNames = workDir.toFile().listFiles().collect { it.name } - + def command = path(process.out.predictions.get(0).get(1)).parent.resolve('.command.sh').text.replaceAll(/\s+/, ' ') assert command.contains('--run-l1 plink_simulated.master') assert command.contains('--bed plink_simulated') assert command.contains('--covarFile plink_simulated_covariates.txt') assert command.contains('--bsize 100') assert !command.contains('--pgen') - assert stagedNames.contains('plink_simulated.bed') - assert stagedNames.contains('plink_simulated.bim') - assert stagedNames.contains('plink_simulated.fam') - assert stagedNames.contains('plink_simulated.master') }, { def predList = path(process.out.predictions.get(0).get(1)) @@ -229,12 +222,12 @@ nextflow_process { def stableLogs = process.out.log.collect { log -> [log[0], path(log[1]).getFileName().toString()] } - assert snapshot( - stablePredictions, - stableLoco, - stableLogs, - process.out.findAll { key, val -> key.startsWith('versions') } - ).match() + assert snapshot([ + predictions: stablePredictions, + loco: stableLoco, + log: stableLogs, + versions_regenie: process.out.versions_regenie + ]).match() } ) } @@ -251,7 +244,7 @@ nextflow_process { process { """ input[0] = [ - [ id:'plink_simulated_plink2' ], + [ id:'plink_simulated' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) @@ -285,14 +278,11 @@ nextflow_process { { assert process.out.predictions.size() == 1 }, { assert process.out.loco.size() == 1 }, { assert process.out.log.size() == 1 }, - { assert process.out.predictions.get(0).get(0).id == 'plink_simulated_plink2' }, - { assert process.out.loco.get(0).get(0).id == 'plink_simulated_plink2' }, - { assert process.out.log.get(0).get(0).id == 'plink_simulated_plink2' }, + { assert process.out.predictions.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.loco.get(0).get(0).id == 'plink_simulated' }, + { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { - def workDir = path(process.out.predictions.get(0).get(1)).parent - def command = workDir.resolve('.command.sh').text - def stagedNames = workDir.toFile().listFiles().collect { it.name } - + def command = path(process.out.predictions.get(0).get(1)).parent.resolve('.command.sh').text.replaceAll(/\s+/, ' ') assert command.contains('--run-l1 plink_simulated_plink2.master') assert command.contains('--pgen plink_simulated') assert command.contains('--bsize 1000') @@ -300,10 +290,6 @@ nextflow_process { assert !command.contains('--bed') assert !command.contains('--covarFile') assert command.contains('--phenoColList QuantitativeTrait') - assert stagedNames.contains('plink_simulated.pgen') - assert stagedNames.contains('plink_simulated.psam') - assert stagedNames.contains('plink_simulated.pvar') - assert stagedNames.contains('plink_simulated_plink2.master') }, { def stablePredictions = process.out.predictions.collect { prediction -> @@ -315,12 +301,12 @@ nextflow_process { def stableLogs = process.out.log.collect { log -> [log[0], path(log[1]).getFileName().toString()] } - assert snapshot( - stablePredictions, - stableLoco, - stableLogs, - process.out.findAll { key, val -> key.startsWith('versions') } - ).match() + assert snapshot([ + predictions: stablePredictions, + loco: stableLoco, + log: stableLogs, + versions_regenie: process.out.versions_regenie + ]).match() } ) } @@ -376,12 +362,12 @@ nextflow_process { def stableLogs = process.out.log.collect { log -> [log[0], path(log[1]).getFileName().toString()] } - assert snapshot( - stablePredictions, - stableLoco, - stableLogs, - process.out.findAll { key, val -> key.startsWith('versions') } - ).match() + assert snapshot([ + predictions: stablePredictions, + loco: stableLoco, + log: stableLogs, + versions_regenie: process.out.versions_regenie + ]).match() } ) } diff --git a/modules/nf-core/regenie/runl1/tests/main.nf.test.snap b/modules/nf-core/regenie/runl1/tests/main.nf.test.snap index a8aa3f1cea9e..28de6e2d4aac 100644 --- a/modules/nf-core/regenie/runl1/tests/main.nf.test.snap +++ b/modules/nf-core/regenie/runl1/tests/main.nf.test.snap @@ -1,31 +1,31 @@ { "homo_sapiens popgen - quantitative plink1 with covariates": { "content": [ - [ - [ - { - "id": "plink_simulated" - }, - "plink_simulated_pred.list" - ] - ], - [ - [ - { - "id": "plink_simulated" - }, - "plink_simulated_1.loco.gz" - ] - ], - [ - [ - { - "id": "plink_simulated" - }, - "plink_simulated.log" - ] - ], { + "predictions": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_pred.list" + ] + ], + "loco": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_1.loco.gz" + ] + ], + "log": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.log" + ] + ], "versions_regenie": [ [ "REGENIE_RUNL1", @@ -39,35 +39,35 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-05-27T19:57:18.939854903" + "timestamp": "2026-06-08T00:58:56.144945786" }, "homo_sapiens popgen - plink1 - stub": { "content": [ - [ - [ - { - "id": "plink_simulated" - }, - "plink_simulated_pred.list" - ] - ], - [ - [ - { - "id": "plink_simulated" - }, - "plink_simulated_1.loco.gz" - ] - ], - [ - [ - { - "id": "plink_simulated" - }, - "plink_simulated.log" - ] - ], { + "predictions": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_pred.list" + ] + ], + "loco": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_1.loco.gz" + ] + ], + "log": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.log" + ] + ], "versions_regenie": [ [ "REGENIE_RUNL1", @@ -85,31 +85,31 @@ }, "homo_sapiens popgen - quantitative plink2 without covariates": { "content": [ - [ - [ - { - "id": "plink_simulated_plink2" - }, - "plink_simulated_pred.list" - ] - ], - [ - [ - { - "id": "plink_simulated_plink2" - }, - "plink_simulated_1.loco.gz" - ] - ], - [ - [ - { - "id": "plink_simulated_plink2" - }, - "plink_simulated.log" - ] - ], { + "predictions": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_pred.list" + ] + ], + "loco": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_1.loco.gz" + ] + ], + "log": [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated.log" + ] + ], "versions_regenie": [ [ "REGENIE_RUNL1", @@ -123,6 +123,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-05-27T19:57:40.719017173" + "timestamp": "2026-06-08T00:59:23.869025925" } } From 17bc8528b11d675fd232cf48cdd5052730e8aeac Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Mon, 8 Jun 2026 17:41:47 +0800 Subject: [PATCH 12/13] Support apptainer container engine in REGENIE runl1 --- modules/nf-core/regenie/runl1/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/regenie/runl1/main.nf b/modules/nf-core/regenie/runl1/main.nf index c1d6a997be9f..88391a5aaba7 100644 --- a/modules/nf-core/regenie/runl1/main.nf +++ b/modules/nf-core/regenie/runl1/main.nf @@ -3,7 +3,7 @@ process REGENIE_RUNL1 { label 'process_medium' conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7a/7a05bf71ea09adc5ebf9f0c656c9b326c0f16ba8e4966914972e58313469a466/data' : 'community.wave.seqera.io/library/regenie:4.1.2--5d361f9fcb2f85cf'}" From a638fb7cd13d06bf21d7ee537da195fc1b9464e8 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Mon, 8 Jun 2026 18:25:30 +0800 Subject: [PATCH 13/13] Remove REGENIE runl1 test prefix override --- modules/nf-core/regenie/runl1/tests/main.nf.test | 3 +-- modules/nf-core/regenie/runl1/tests/nextflow.config | 4 ---- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/modules/nf-core/regenie/runl1/tests/main.nf.test b/modules/nf-core/regenie/runl1/tests/main.nf.test index 253d14466544..e561eadd7cca 100644 --- a/modules/nf-core/regenie/runl1/tests/main.nf.test +++ b/modules/nf-core/regenie/runl1/tests/main.nf.test @@ -239,7 +239,6 @@ nextflow_process { when { params { module_args = "--phenoColList QuantitativeTrait" - module_prefix = "plink_simulated_plink2" } process { """ @@ -283,7 +282,7 @@ nextflow_process { { assert process.out.log.get(0).get(0).id == 'plink_simulated' }, { def command = path(process.out.predictions.get(0).get(1)).parent.resolve('.command.sh').text.replaceAll(/\s+/, ' ') - assert command.contains('--run-l1 plink_simulated_plink2.master') + assert command.contains('--run-l1 plink_simulated.master') assert command.contains('--pgen plink_simulated') assert command.contains('--bsize 1000') assert command.contains('--out plink_simulated') diff --git a/modules/nf-core/regenie/runl1/tests/nextflow.config b/modules/nf-core/regenie/runl1/tests/nextflow.config index a8db9c91da7e..fe3704faf1d9 100644 --- a/modules/nf-core/regenie/runl1/tests/nextflow.config +++ b/modules/nf-core/regenie/runl1/tests/nextflow.config @@ -1,13 +1,9 @@ params { module_args = "" - module_prefix = null } process { withName: "REGENIE_SPLITL0|REGENIE_RUNL0|REGENIE_RUNL1|REGENIE_SPLITL0_PLINK2|REGENIE_RUNL0_PLINK2" { ext.args = { params.module_args ?: "" } } - withName: "REGENIE_SPLITL0_PLINK2|REGENIE_RUNL0_PLINK2" { - ext.prefix = { params.module_prefix } - } }