nf-core
diff --git a/‎modules/nf-core/gcta/bivariateremlldms/main.nf‎
Lines changed: 9 additions & 6 deletions b/‎modules/nf-core/gcta/bivariateremlldms/main.nf‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎modules/nf-core/gcta/bivariateremlldms/meta.yml‎
Lines changed: 23 additions & 6 deletions b/‎modules/nf-core/gcta/bivariateremlldms/meta.yml‎
Lines changed: 23 additions & 6 deletions
diff --git a/‎modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test‎
Lines changed: 81 additions & 16 deletions b/‎modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test‎
Lines changed: 81 additions & 16 deletions
diff --git a/‎modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap‎
Lines changed: 27 additions & 1 deletion b/‎modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap‎
Lines changed: 27 additions & 1 deletion
diff --git a/‎modules/nf-core/gcta/makegrmpart/main.nf‎
Lines changed: 15 additions & 15 deletions b/‎modules/nf-core/gcta/makegrmpart/main.nf‎
Lines changed: 15 additions & 15 deletions
@@ -2,20 +2,20 @@ process GCTA_BIVARIATEREMLLDMS {
     tag "bivariate_reml_ldms_${meta.id}_${meta2.id}"
     label 'process_medium'
     conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' :
-        'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+        ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data'
+        : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}"
 
     input:
-    tuple val(meta), path(phenotype_file)
+    tuple val(meta), path(phenotype_file), val(mpheno), val(prevalence)
     tuple val(meta2), path(mgrm_file), path(grm_files)
     tuple val(meta3), path(quant_covariates_file)
     tuple val(meta4), path(cat_covariates_file)
 
     output:
     tuple val(meta), path("*.hsq"), emit: bivariate_results
     tuple val(meta), path("*.log"), emit: log_file
-    tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions
+    tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -26,13 +26,16 @@ process GCTA_BIVARIATEREMLLDMS {
     def qcovar_param = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : ''
     def covar_param = cat_covariates_file ? "--covar ${cat_covariates_file}" : ''
     def extra_args = task.ext.args ?: ''
+    def bivar_traits = mpheno ?: '1 2'
+    def prevalence_param = prevalence ? "--reml-bivar-prevalence ${prevalence}" : ''
 
     """
 
     gcta \\
-        --reml-bivar 1 2 \\
+        --reml-bivar ${bivar_traits} \\
         --mgrm ${mgrm_file} \\
         --pheno "${phenotype_file}" \\
+        ${prevalence_param} \\
         ${qcovar_param} \\
         ${covar_param} \\
         --reml-bivar-no-constrain \\
 
@@ -3,16 +3,23 @@ name: "gcta_bivariateremlldms"
 description: Run bivariate REML-LDMS analysis with an MGRM manifest
 keywords:
   - gcta
+  - genome-wide complex trait analysis
   - reml
+  - restricted maximum likelihood
   - bivariate
   - ldms
+  - linkage disequilibrium and minor allele frequency stratification
+  - grm
+  - genetic relationship matrix
   - genetics
 tools:
   - "gcta":
       description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data."
       homepage: "https://yanglab.westlake.edu.cn/software/gcta/"
       documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf"
       tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/"
+      licence: ["GPL-3.0-only"]
+      identifier: "biotools:gcta"
 input:
   - - meta:
         type: map
@@ -25,6 +32,16 @@ input:
         pattern: "*.{phe,pheno,txt,tsv}"
         ontologies:
           - edam: "http://edamontology.org/format_3475"
+    - mpheno:
+        type: string
+        description: |
+          Optional pair of phenotype column indices passed to `--reml-bivar`;
+          pass `[]` to use the module default of `1 2`
+    - prevalence:
+        type: string
+        description: |
+          Optional pair of disease prevalence values passed to `--reml-bivar-prevalence`
+          for binary traits; pass `[]` for quantitative traits
   - - meta2:
         type: map
         description: |
@@ -38,8 +55,8 @@ input:
           - edam: "http://edamontology.org/format_2330"
     - grm_files:
         type: file
-        description: GRM sidecar files referenced by `mgrm_file`
-        pattern: "*"
+        description: GRM bundles referenced by `mgrm_file`
+        pattern: "*.grm.*"
         ontologies: []
   - - meta3:
         type: map
@@ -95,7 +112,7 @@ output:
       - "gcta":
           type: string
           description: The tool name
-      - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'":
+      - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
           type: eval
           description: The command used to retrieve the GCTA version
 topics:
@@ -106,10 +123,10 @@ topics:
       - gcta:
           type: string
           description: The tool name
-      - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//':
+      - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
           type: eval
           description: The command used to retrieve the GCTA version
 authors:
-  - "@andongni"
+  - "@lyh970817"
 maintainers:
-  - "@andongni"
+  - "@lyh970817"
@@ -28,6 +28,20 @@ nextflow_process {
             }
         }
 
+        run("GAWK", alias: "GAWK_BIVARIATE_BINARY_PHENO") {
+            script "../../../gawk/main.nf"
+            process {
+                """
+                input[0] = [
+                    [ id:'BinaryTrait1__BinaryTrait2' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true)
+                ]
+                input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, (NR % 2) + 1, (NR % 3 == 0 ? 2 : 1) }').collectFile(name:'bivariate_binary_phenotype.awk')
+                input[2] = false
+                """
+            }
+        }
+
         run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") {
             script "../../../gawk/main.nf"
             process {
@@ -73,14 +87,16 @@ nextflow_process {
             }
         }
 
-        run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRM_LDMS1") {
+        run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS1") {
             script "../../makegrmpart/main.nf"
             process {
                 """
                 file('plink_simulated.mbfile').text = 'plink_simulated\\n'
 
                 input[0] = [
-                    [ id:'plink_simulated_ldms1', part_gcta_job:1, nparts_gcta:1 ],
+                    [ id:'plink_simulated_ldms1' ],
+                    1,
+                    1,
                     file('plink_simulated.mbfile'),
                     [
                         file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
@@ -100,14 +116,16 @@ nextflow_process {
             }
         }
 
-        run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRM_LDMS2") {
+        run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS2") {
             script "../../makegrmpart/main.nf"
             process {
                 """
                 file('plink_simulated.mbfile').text = 'plink_simulated\\n'
 
                 input[0] = [
-                    [ id:'plink_simulated_ldms2', part_gcta_job:1, nparts_gcta:1 ],
+                    [ id:'plink_simulated_ldms2' ],
+                    1,
+                    1,
                     file('plink_simulated.mbfile'),
                     [
                         file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
@@ -134,13 +152,13 @@ nextflow_process {
                     .of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1')
                     .collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
 
-                ldms_grm_files = GCTA_MAKEGRM_LDMS1.out.grm_files
-                    .mix(GCTA_MAKEGRM_LDMS2.out.grm_files)
-                    .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] }
+                ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files
+                    .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files)
+                    .map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files }
                     .collect()
                     .map { rows -> rows.flatten() }
 
-                input[0] = GAWK_BIVARIATE_PHENO.out.output
+                input[0] = GAWK_BIVARIATE_PHENO.out.output.map { meta, phenotype_file -> [meta, phenotype_file, [], []] }
                 input[1] = mgrm_file
                     .combine(ldms_grm_files)
                     .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] }
@@ -187,6 +205,53 @@ nextflow_process {
         }
     }
 
+    test("homo_sapiens popgen - binary bivariate phenotype with ldms mgrm and prevalence") {
+        config "./nextflow.config"
+        when {
+            process {
+                """
+                mgrm_file = Channel
+                    .of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1')
+                    .collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
+
+                ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files
+                    .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files)
+                    .map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files }
+                    .collect()
+                    .map { rows -> rows.flatten() }
+
+                input[0] = GAWK_BIVARIATE_BINARY_PHENO.out.output.map { meta, phenotype_file -> [meta, phenotype_file, '1 2', '0.30 0.25'] }
+                input[1] = mgrm_file
+                    .combine(ldms_grm_files)
+                    .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] }
+                input[2] = [[ id:'covariates_quant' ], []]
+                input[3] = [[ id:'covariates_cat' ], []]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.bivariate_results.size() == 1 },
+                { assert process.out.log_file.size() == 1 },
+                { assert process.out.bivariate_results.get(0).get(0).id == "BinaryTrait1__BinaryTrait2" },
+                { assert file(process.out.bivariate_results.get(0).get(1)).name == "BinaryTrait1__BinaryTrait2.hsq" },
+                { assert file(process.out.log_file.get(0).get(1)).name == "BinaryTrait1__BinaryTrait2.log" },
+                {
+                    def logText = file(process.out.log_file.get(0).get(1)).text
+                    assert logText.contains("--reml-bivar-prevalence 0.3 0.25")
+                },
+                {
+                    assert snapshot(
+                        process.out.bivariate_results,
+                        process.out.findAll { key, val -> key.startsWith('versions') }
+                    ).match()
+                }
+            )
+        }
+    }
+
     test("homo_sapiens popgen - bivariate phenotype fails when mgrm references missing GRM basename") {
         config "./nextflow.config"
         when {
@@ -196,13 +261,13 @@ nextflow_process {
                     .of('plink_simulated_ldms_missing.part_1_1')
                     .collectFile(name:'plink_simulated_ldms_broken.mgrm', newLine: true)
 
-                ldms_grm_files = GCTA_MAKEGRM_LDMS1.out.grm_files
-                    .mix(GCTA_MAKEGRM_LDMS2.out.grm_files)
-                    .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] }
+                ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files
+                    .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files)
+                    .map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files }
                     .collect()
                     .map { rows -> rows.flatten() }
 
-                input[0] = GAWK_BIVARIATE_PHENO.out.output
+                input[0] = GAWK_BIVARIATE_PHENO.out.output.map { meta, phenotype_file -> [meta, phenotype_file, [], []] }
                 input[1] = broken_mgrm_file
                     .combine(ldms_grm_files)
                     .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] }
@@ -231,13 +296,13 @@ nextflow_process {
                     .of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1')
                     .collectFile(name:'plink_simulated_ldms.mgrm', newLine: true)
 
-                ldms_grm_files = GCTA_MAKEGRM_LDMS1.out.grm_files
-                    .mix(GCTA_MAKEGRM_LDMS2.out.grm_files)
-                    .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] }
+                ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files
+                    .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files)
+                    .map { meta, grm_files, nparts_gcta, part_gcta_job -> grm_files }
                     .collect()
                     .map { rows -> rows.flatten() }
 
-                input[0] = GAWK_BIVARIATE_PHENO.out.output
+                input[0] = GAWK_BIVARIATE_PHENO.out.output.map { meta, phenotype_file -> [meta, phenotype_file, [], []] }
                 input[1] = mgrm_file
                     .combine(ldms_grm_files)
                     .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] }
 
@@ -177,5 +177,31 @@
             "nextflow": "25.10.4"
         },
         "timestamp": "2026-03-21T00:39:38.748257872"
+    },
+    "homo_sapiens popgen - binary bivariate phenotype with ldms mgrm and prevalence": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "BinaryTrait1__BinaryTrait2"
+                    },
+                    "BinaryTrait1__BinaryTrait2.hsq:md5,56f5b427deec963764e25a9acad76b80"
+                ]
+            ],
+            {
+                "versions_gcta": [
+                    [
+                        "GCTA_BIVARIATEREMLLDMS",
+                        "gcta",
+                        "1.94.1"
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.4"
+        },
+        "timestamp": "2026-05-13T16:07:21.380446323"
     }
-}
+}
@@ -1,25 +1,25 @@
 process GCTA_MAKEGRMPART {
-    tag "part ${meta.part_gcta_job} of ${meta.nparts_gcta} (${meta.id})"
+    tag "${meta.id}: part ${part_gcta_job} of ${nparts_gcta}"
     label 'process_medium'
     conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' :
-        'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+        ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data'
+        : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}"
 
     input:
-    tuple val(meta), path(mfile), path(bed_pgen), path(bim_pvar), path(fam_psam)
+    tuple val(meta), val(nparts_gcta), val(part_gcta_job), path(mfile), path(bed_pgen), path(bim_pvar), path(fam_psam)
     tuple val(meta2), path(snp_group_file)
 
     output:
-    tuple val(meta), path("*.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.id"), path("*.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.bin"), path("*.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.N.bin"), emit: grm_files
-    tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions
+    tuple val(meta), path("*.part_${nparts}_${part}.grm.*"), val(nparts_gcta), val(part_gcta_job), emit: grm_files
+    tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
-    def part_gcta_job = meta.part_gcta_job
-    def nparts_gcta = meta.nparts_gcta
+    nparts = nparts_gcta ?: 1
+    part = part_gcta_job ?: 1
     def extract_cmd = snp_group_file ? "--extract ${snp_group_file}" : ''
     def extra_args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
@@ -28,21 +28,21 @@ process GCTA_MAKEGRMPART {
     def multi_file_flag = genotype_extension == 'pgen' ? '--mpfile' : '--mbfile'
 
     """
-
     gcta \\
         ${multi_file_flag} ${mfile} \\
-        --make-grm-part ${nparts_gcta} ${part_gcta_job} \\
+        --make-grm-part ${nparts} ${part} \\
         ${extract_cmd} \\
-        --maf 0.01 \\
         --thread-num ${task.cpus} \\
         --out ${prefix} ${extra_args}
     """
 
     stub:
+    nparts = nparts_gcta ?: 1
+    part = part_gcta_job ?: 1
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    touch ${prefix}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.id
-    touch ${prefix}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.bin
-    touch ${prefix}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.N.bin
+    touch ${prefix}.part_${nparts}_${part}.grm.id
+    touch ${prefix}.part_${nparts}_${part}.grm.bin
+    touch ${prefix}.part_${nparts}_${part}.grm.N.bin
     """
 }