From 8d82104292815fa2478abf190e16dac928136312 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sun, 15 Mar 2026 16:51:20 +0800 Subject: [PATCH 1/5] Add HDL shared test fixtures --- .../popgen/hdl/reference/chr1.1_toy.bim | 2 ++ .../popgen/hdl/reference/chr1.1_toy.rda | Bin 0 -> 127 bytes .../popgen/hdl/reference/chr1.2_toy.bim | 2 ++ .../popgen/hdl/reference/chr1.2_toy.rda | Bin 0 -> 127 bytes .../popgen/hdl/reference/toy_snp_counter.RData | Bin 0 -> 127 bytes .../popgen/hdl/reference/toy_snp_list.RData | Bin 0 -> 106 bytes .../popgen/hdl/sumstats/trait1_canonical.tsv | 5 +++++ .../popgen/hdl/sumstats/trait2_canonical.tsv | 5 +++++ 8 files changed, 14 insertions(+) create mode 100644 data/genomics/homo_sapiens/popgen/hdl/reference/chr1.1_toy.bim create mode 100644 data/genomics/homo_sapiens/popgen/hdl/reference/chr1.1_toy.rda create mode 100644 data/genomics/homo_sapiens/popgen/hdl/reference/chr1.2_toy.bim create mode 100644 data/genomics/homo_sapiens/popgen/hdl/reference/chr1.2_toy.rda create mode 100644 data/genomics/homo_sapiens/popgen/hdl/reference/toy_snp_counter.RData create mode 100644 data/genomics/homo_sapiens/popgen/hdl/reference/toy_snp_list.RData create mode 100644 data/genomics/homo_sapiens/popgen/hdl/sumstats/trait1_canonical.tsv create mode 100644 data/genomics/homo_sapiens/popgen/hdl/sumstats/trait2_canonical.tsv diff --git a/data/genomics/homo_sapiens/popgen/hdl/reference/chr1.1_toy.bim b/data/genomics/homo_sapiens/popgen/hdl/reference/chr1.1_toy.bim new file mode 100644 index 000000000..8535c8640 --- /dev/null +++ b/data/genomics/homo_sapiens/popgen/hdl/reference/chr1.1_toy.bim @@ -0,0 +1,2 @@ +1 rs1 0 101 A G +1 rs2 0 102 C T diff --git a/data/genomics/homo_sapiens/popgen/hdl/reference/chr1.1_toy.rda b/data/genomics/homo_sapiens/popgen/hdl/reference/chr1.1_toy.rda new file mode 100644 index 0000000000000000000000000000000000000000..219f1b159d1b6eaf8ce52c28b0f959bd271e92d8 GIT binary patch literal 127 zcmb2|=3oE=X6~X+gJ)e2k`fXU(h?F=lM>QcjU*$So$r+BN=QV^XpnJZ6JzsJIQ!^v z&>Ws6KO1J8{P);LU`FbHU0$9GSB##kJUcYC&XSoa{cXH-GvlnFV+ISPCOC=x=I4oE ebdtH+H^pay>>dT{2_NhQ*ce!3Zv43pv=;zd$S+&~ literal 0 HcmV?d00001 diff --git a/data/genomics/homo_sapiens/popgen/hdl/reference/chr1.2_toy.bim b/data/genomics/homo_sapiens/popgen/hdl/reference/chr1.2_toy.bim new file mode 100644 index 000000000..c001831a0 --- /dev/null +++ b/data/genomics/homo_sapiens/popgen/hdl/reference/chr1.2_toy.bim @@ -0,0 +1,2 @@ +1 rs3 0 201 A C +1 rs4 0 202 G T diff --git a/data/genomics/homo_sapiens/popgen/hdl/reference/chr1.2_toy.rda b/data/genomics/homo_sapiens/popgen/hdl/reference/chr1.2_toy.rda new file mode 100644 index 0000000000000000000000000000000000000000..1098081833392e124ee82249fe8c39082ad06d3e GIT binary patch literal 127 zcmb2|=3oE=X6~X+gJ)e2k`fXU(h?F=lM>QcjU*$So$r+BN=QV^XpnJZ6JzsJIQ!^v z&>Ws6KO1J8{I}Uq;zs>kPNvNlt{6R6d3I=OT_OYXZ}W7A2RgHYju|YFn&2e%o1Z6w e(MjfN-xQw-vU?P)Cw#CMU}MPflkB|?v=;z?`7f>j literal 0 HcmV?d00001 diff --git a/data/genomics/homo_sapiens/popgen/hdl/reference/toy_snp_counter.RData b/data/genomics/homo_sapiens/popgen/hdl/reference/toy_snp_counter.RData new file mode 100644 index 0000000000000000000000000000000000000000..2d98523271e72a7976e25388d39b33d47794f9f9 GIT binary patch literal 127 zcmb2|=3oE=X6~X+gJ)e2k`fXU(h?F=lM>QcjU*$So$r+BN=QV^XpnJZ6JzsJ(47)K zCp=)y^vRJii>A+v4v3mG>(GG%Dm-Ba4;)}W;={)yz_9qDPr_O6GrqcPMl2IpBzYT; e*qcm^;n>}=x%s6%U$W#sMuyi5ADw&*v=;!Xl`cyF literal 0 HcmV?d00001 diff --git a/data/genomics/homo_sapiens/popgen/hdl/reference/toy_snp_list.RData b/data/genomics/homo_sapiens/popgen/hdl/reference/toy_snp_list.RData new file mode 100644 index 0000000000000000000000000000000000000000..9324b958ce5788dc847d15f7668b096bb6eef4bf GIT binary patch literal 106 zcmb2|=3oE=X6~X+gJ)e2k`fXU(h?F=lM>QcjU*$So$r+BN=QV^XpnJZ6JzsJ&<&gu z97}g{K literal 0 HcmV?d00001 diff --git a/data/genomics/homo_sapiens/popgen/hdl/sumstats/trait1_canonical.tsv b/data/genomics/homo_sapiens/popgen/hdl/sumstats/trait1_canonical.tsv new file mode 100644 index 000000000..97581a58f --- /dev/null +++ b/data/genomics/homo_sapiens/popgen/hdl/sumstats/trait1_canonical.tsv @@ -0,0 +1,5 @@ +SNP A1 A2 CHR POS RSID EffectAllele OtherAllele N Z +rs1 A G 1 101 rs1 A G 10000 0.5 +rs2 C T 1 202 rs2 C T 10000 -0.2 +rs3 A C 1 303 rs3 A C 10000 0.4 +rs4 G T 1 404 rs4 G T 10000 -0.1 diff --git a/data/genomics/homo_sapiens/popgen/hdl/sumstats/trait2_canonical.tsv b/data/genomics/homo_sapiens/popgen/hdl/sumstats/trait2_canonical.tsv new file mode 100644 index 000000000..aaea638ef --- /dev/null +++ b/data/genomics/homo_sapiens/popgen/hdl/sumstats/trait2_canonical.tsv @@ -0,0 +1,5 @@ +SNP A1 A2 CHR POS RSID EffectAllele OtherAllele N Z +rs1 A G 1 101 rs1 A G 12000 0.3 +rs2 C T 1 202 rs2 C T 12000 -0.4 +rs3 A C 1 303 rs3 A C 12000 0.2 +rs4 G T 1 404 rs4 G T 12000 -0.2 From 17bca4dd516d6656d3a1f5679227a4ace84dbaa7 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sun, 15 Mar 2026 23:20:31 +0800 Subject: [PATCH 2/5] Document HDL fixtures in root README --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 7928e5eaf..e9090eb82 100644 --- a/README.md +++ b/README.md @@ -692,6 +692,14 @@ The earth sciences folder contain subfolders for different data formats encounte - 1000GP.chr*.chunks.txt: chunks of the chromosome obtain with GLIMPSE_chunk - AFR.gwas: Study locus file. From [SuShiE](https://github.com/mancusolab/sushie). - AFR.ld: LD matrix file. From [SuShiE](https://github.com/mancusolab/sushie). + - hdl/reference/chr1.1_toy.bim: toy HDL LD reference BIM sidecar for chunk 1.1 + - hdl/reference/chr1.1_toy.rda: toy HDL LD reference payload for chunk 1.1 + - hdl/reference/chr1.2_toy.bim: toy HDL LD reference BIM sidecar for chunk 1.2 + - hdl/reference/chr1.2_toy.rda: toy HDL LD reference payload for chunk 1.2 + - hdl/reference/toy_snp_counter.RData: toy HDL SNP count metadata + - hdl/reference/toy_snp_list.RData: toy HDL SNP list metadata + - hdl/sumstats/trait1_canonical.tsv: canonical HDL summary statistics for trait 1 + - hdl/sumstats/trait2_canonical.tsv: canonical HDL summary statistics for trait 2 - svsig: - NA03697B2_new.pbmm2.repeats.svsig.gz: structural variant file for NA03697B2_new.pbmm2.repeats.bam, created with PBSV discover version (2.9.0 default settings) From 51846da12537574ba55bd5cb8819b93f19700337 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Mon, 16 Mar 2026 15:57:10 +0800 Subject: [PATCH 3/5] Document HDL fixtures and align toy sumstats --- README.md | 16 +-- .../homo_sapiens/popgen/hdl/README.md | 41 +++++++ .../popgen/hdl/generate_toy_hdl_data.R | 110 ++++++++++++++++++ .../popgen/hdl/sumstats/trait1_canonical.tsv | 6 +- .../popgen/hdl/sumstats/trait2_canonical.tsv | 6 +- 5 files changed, 165 insertions(+), 14 deletions(-) create mode 100644 data/genomics/homo_sapiens/popgen/hdl/README.md create mode 100644 data/genomics/homo_sapiens/popgen/hdl/generate_toy_hdl_data.R diff --git a/README.md b/README.md index e9090eb82..3d0ad2e18 100644 --- a/README.md +++ b/README.md @@ -692,14 +692,14 @@ The earth sciences folder contain subfolders for different data formats encounte - 1000GP.chr*.chunks.txt: chunks of the chromosome obtain with GLIMPSE_chunk - AFR.gwas: Study locus file. From [SuShiE](https://github.com/mancusolab/sushie). - AFR.ld: LD matrix file. From [SuShiE](https://github.com/mancusolab/sushie). - - hdl/reference/chr1.1_toy.bim: toy HDL LD reference BIM sidecar for chunk 1.1 - - hdl/reference/chr1.1_toy.rda: toy HDL LD reference payload for chunk 1.1 - - hdl/reference/chr1.2_toy.bim: toy HDL LD reference BIM sidecar for chunk 1.2 - - hdl/reference/chr1.2_toy.rda: toy HDL LD reference payload for chunk 1.2 - - hdl/reference/toy_snp_counter.RData: toy HDL SNP count metadata - - hdl/reference/toy_snp_list.RData: toy HDL SNP list metadata - - hdl/sumstats/trait1_canonical.tsv: canonical HDL summary statistics for trait 1 - - hdl/sumstats/trait2_canonical.tsv: canonical HDL summary statistics for trait 2 + - hdl/reference/chr1.1_toy.bim: Toy HDL LD reference BIM sidecar for chunk 1.1. From [HDL](https://github.com/zhenin/HDL). + - hdl/reference/chr1.1_toy.rda: Toy HDL LD reference payload for chunk 1.1. From [HDL](https://github.com/zhenin/HDL). + - hdl/reference/chr1.2_toy.bim: Toy HDL LD reference BIM sidecar for chunk 1.2. From [HDL](https://github.com/zhenin/HDL). + - hdl/reference/chr1.2_toy.rda: Toy HDL LD reference payload for chunk 1.2. From [HDL](https://github.com/zhenin/HDL). + - hdl/reference/toy_snp_counter.RData: Toy HDL SNP count metadata. From [HDL](https://github.com/zhenin/HDL). + - hdl/reference/toy_snp_list.RData: Toy HDL SNP list metadata. From [HDL](https://github.com/zhenin/HDL). + - hdl/sumstats/trait1_canonical.tsv: Canonical HDL summary statistics for trait 1. From [HDL](https://github.com/zhenin/HDL). + - hdl/sumstats/trait2_canonical.tsv: Canonical HDL summary statistics for trait 2. From [HDL](https://github.com/zhenin/HDL). - svsig: - NA03697B2_new.pbmm2.repeats.svsig.gz: structural variant file for NA03697B2_new.pbmm2.repeats.bam, created with PBSV discover version (2.9.0 default settings) diff --git a/data/genomics/homo_sapiens/popgen/hdl/README.md b/data/genomics/homo_sapiens/popgen/hdl/README.md new file mode 100644 index 000000000..d45098048 --- /dev/null +++ b/data/genomics/homo_sapiens/popgen/hdl/README.md @@ -0,0 +1,41 @@ +# HDL Toy Test Dataset + +These files are synthetic toy fixtures for HDL module testing in the companion +`nf-core/modules` work for `nf-core/modules#10912`. They are intended to exercise +[HDL](https://github.com/zhenin/HDL) inputs in tests, not to provide a +scientific LD reference panel or redistributed upstream reference bundle. + +## Layout + +- `reference/`: toy HDL LD reference chunks and metadata sidecars +- `sumstats/`: canonical toy summary-statistics tables aligned to the toy SNPs + +## Regeneration + +From this directory: + +```bash +Rscript generate_toy_hdl_data.R +``` + +From the root of the `nf-core/test-datasets` worktree: + +```bash +Rscript data/genomics/homo_sapiens/popgen/hdl/generate_toy_hdl_data.R +``` + +## R Objects + +The `.bim` sidecars, both canonical `sumstats/*.tsv` files, and the R binary +payloads are all generated locally by `generate_toy_hdl_data.R` from fully +synthetic constants in this directory. + +- `reference/chr1.1_toy.rda` and `reference/chr1.2_toy.rda` each contain + synthetic `LDsc`, `lam`, and `V` objects for one toy HDL chunk. +- `reference/toy_snp_counter.RData` contains `nsnps.list` and + `nsnps.list.imputed`, each as a named one-element list with the toy chunk SNP + counts. +- `reference/toy_snp_list.RData` contains `snps.list.imputed.vector`, the four + synthetic SNP IDs shared by the toy fixtures. +- `sumstats/trait1_canonical.tsv` and `sumstats/trait2_canonical.tsv` are tiny + canonical summary-statistics tables keyed to those synthetic SNP IDs. diff --git a/data/genomics/homo_sapiens/popgen/hdl/generate_toy_hdl_data.R b/data/genomics/homo_sapiens/popgen/hdl/generate_toy_hdl_data.R new file mode 100644 index 000000000..00be2a31d --- /dev/null +++ b/data/genomics/homo_sapiens/popgen/hdl/generate_toy_hdl_data.R @@ -0,0 +1,110 @@ +#!/usr/bin/env Rscript + +args <- commandArgs(trailingOnly = FALSE) +file_arg <- "--file=" +script_path <- sub(file_arg, "", args[grep(file_arg, args)]) + +if (length(script_path) != 1 || script_path == "") { + stop("Unable to determine the script path from commandArgs().") +} + +script_dir <- dirname(normalizePath(script_path)) +reference_dir <- file.path(script_dir, "reference") +sumstats_dir <- file.path(script_dir, "sumstats") + +dir.create(reference_dir, recursive = TRUE, showWarnings = FALSE) +dir.create(sumstats_dir, recursive = TRUE, showWarnings = FALSE) + +writeLines( + c( + "1 rs1 0 101 A G", + "1 rs2 0 102 C T" + ), + file.path(reference_dir, "chr1.1_toy.bim") +) + +writeLines( + c( + "1 rs3 0 201 A C", + "1 rs4 0 202 G T" + ), + file.path(reference_dir, "chr1.2_toy.bim") +) + +lam <- c(1.3, 0.85) +LDsc <- c(1.1, 1.4) +V <- diag(2) +save( + LDsc, + lam, + V, + file = file.path(reference_dir, "chr1.1_toy.rda"), + compress = "gzip" +) + +lam <- c(1.25, 0.9) +LDsc <- c(1.2, 1.35) +V <- diag(2) +save( + LDsc, + lam, + V, + file = file.path(reference_dir, "chr1.2_toy.rda"), + compress = "gzip" +) + +nsnps.list <- list("1" = c(2, 2)) +nsnps.list.imputed <- list("1" = c(2, 2)) +save( + nsnps.list.imputed, + nsnps.list, + file = file.path(reference_dir, "toy_snp_counter.RData"), + compress = "gzip" +) + +snps.list.imputed.vector <- c("rs1", "rs2", "rs3", "rs4") +save( + snps.list.imputed.vector, + file = file.path(reference_dir, "toy_snp_list.RData"), + compress = "gzip" +) + +trait1 <- data.frame( + SNP = c("rs1", "rs2", "rs3", "rs4"), + A1 = c("A", "C", "A", "G"), + A2 = c("G", "T", "C", "T"), + CHR = c(1, 1, 1, 1), + POS = c(101, 102, 201, 202), + RSID = c("rs1", "rs2", "rs3", "rs4"), + EffectAllele = c("A", "C", "A", "G"), + OtherAllele = c("G", "T", "C", "T"), + N = c(10000, 10000, 10000, 10000), + Z = c(0.5, -0.2, 0.4, -0.1) +) +write.table( + trait1, + file.path(sumstats_dir, "trait1_canonical.tsv"), + sep = "\t", + quote = FALSE, + row.names = FALSE +) + +trait2 <- data.frame( + SNP = c("rs1", "rs2", "rs3", "rs4"), + A1 = c("A", "C", "A", "G"), + A2 = c("G", "T", "C", "T"), + CHR = c(1, 1, 1, 1), + POS = c(101, 102, 201, 202), + RSID = c("rs1", "rs2", "rs3", "rs4"), + EffectAllele = c("A", "C", "A", "G"), + OtherAllele = c("G", "T", "C", "T"), + N = c(12000, 12000, 12000, 12000), + Z = c(0.3, -0.4, 0.2, -0.2) +) +write.table( + trait2, + file.path(sumstats_dir, "trait2_canonical.tsv"), + sep = "\t", + quote = FALSE, + row.names = FALSE +) diff --git a/data/genomics/homo_sapiens/popgen/hdl/sumstats/trait1_canonical.tsv b/data/genomics/homo_sapiens/popgen/hdl/sumstats/trait1_canonical.tsv index 97581a58f..2ac8d27cd 100644 --- a/data/genomics/homo_sapiens/popgen/hdl/sumstats/trait1_canonical.tsv +++ b/data/genomics/homo_sapiens/popgen/hdl/sumstats/trait1_canonical.tsv @@ -1,5 +1,5 @@ SNP A1 A2 CHR POS RSID EffectAllele OtherAllele N Z rs1 A G 1 101 rs1 A G 10000 0.5 -rs2 C T 1 202 rs2 C T 10000 -0.2 -rs3 A C 1 303 rs3 A C 10000 0.4 -rs4 G T 1 404 rs4 G T 10000 -0.1 +rs2 C T 1 102 rs2 C T 10000 -0.2 +rs3 A C 1 201 rs3 A C 10000 0.4 +rs4 G T 1 202 rs4 G T 10000 -0.1 diff --git a/data/genomics/homo_sapiens/popgen/hdl/sumstats/trait2_canonical.tsv b/data/genomics/homo_sapiens/popgen/hdl/sumstats/trait2_canonical.tsv index aaea638ef..6d84622e4 100644 --- a/data/genomics/homo_sapiens/popgen/hdl/sumstats/trait2_canonical.tsv +++ b/data/genomics/homo_sapiens/popgen/hdl/sumstats/trait2_canonical.tsv @@ -1,5 +1,5 @@ SNP A1 A2 CHR POS RSID EffectAllele OtherAllele N Z rs1 A G 1 101 rs1 A G 12000 0.3 -rs2 C T 1 202 rs2 C T 12000 -0.4 -rs3 A C 1 303 rs3 A C 12000 0.2 -rs4 G T 1 404 rs4 G T 12000 -0.2 +rs2 C T 1 102 rs2 C T 12000 -0.4 +rs3 A C 1 201 rs3 A C 12000 0.2 +rs4 G T 1 202 rs4 G T 12000 -0.2 From 21357a92d50c36b44e9689d987d7eacefa2241e6 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Wed, 18 Mar 2026 18:11:20 +0800 Subject: [PATCH 4/5] docs: clarify synthetic HDL fixture provenance --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 3d0ad2e18..80b784bb2 100644 --- a/README.md +++ b/README.md @@ -692,14 +692,14 @@ The earth sciences folder contain subfolders for different data formats encounte - 1000GP.chr*.chunks.txt: chunks of the chromosome obtain with GLIMPSE_chunk - AFR.gwas: Study locus file. From [SuShiE](https://github.com/mancusolab/sushie). - AFR.ld: LD matrix file. From [SuShiE](https://github.com/mancusolab/sushie). - - hdl/reference/chr1.1_toy.bim: Toy HDL LD reference BIM sidecar for chunk 1.1. From [HDL](https://github.com/zhenin/HDL). - - hdl/reference/chr1.1_toy.rda: Toy HDL LD reference payload for chunk 1.1. From [HDL](https://github.com/zhenin/HDL). - - hdl/reference/chr1.2_toy.bim: Toy HDL LD reference BIM sidecar for chunk 1.2. From [HDL](https://github.com/zhenin/HDL). - - hdl/reference/chr1.2_toy.rda: Toy HDL LD reference payload for chunk 1.2. From [HDL](https://github.com/zhenin/HDL). - - hdl/reference/toy_snp_counter.RData: Toy HDL SNP count metadata. From [HDL](https://github.com/zhenin/HDL). - - hdl/reference/toy_snp_list.RData: Toy HDL SNP list metadata. From [HDL](https://github.com/zhenin/HDL). - - hdl/sumstats/trait1_canonical.tsv: Canonical HDL summary statistics for trait 1. From [HDL](https://github.com/zhenin/HDL). - - hdl/sumstats/trait2_canonical.tsv: Canonical HDL summary statistics for trait 2. From [HDL](https://github.com/zhenin/HDL). + - hdl/reference/chr1.1_toy.bim: Synthetic toy HDL-format BIM sidecar for chunk 1.1, generated by `generate_toy_hdl_data.R` for HDL-compatible inputs. + - hdl/reference/chr1.1_toy.rda: Synthetic toy HDL-format LD reference payload for chunk 1.1, generated by `generate_toy_hdl_data.R` for HDL-compatible inputs. + - hdl/reference/chr1.2_toy.bim: Synthetic toy HDL-format BIM sidecar for chunk 1.2, generated by `generate_toy_hdl_data.R` for HDL-compatible inputs. + - hdl/reference/chr1.2_toy.rda: Synthetic toy HDL-format LD reference payload for chunk 1.2, generated by `generate_toy_hdl_data.R` for HDL-compatible inputs. + - hdl/reference/toy_snp_counter.RData: Synthetic toy HDL-format SNP count metadata, generated by `generate_toy_hdl_data.R` for HDL-compatible inputs. + - hdl/reference/toy_snp_list.RData: Synthetic toy HDL-format SNP list metadata, generated by `generate_toy_hdl_data.R` for HDL-compatible inputs. + - hdl/sumstats/trait1_canonical.tsv: Synthetic toy HDL-format canonical summary statistics for trait 1, generated by `generate_toy_hdl_data.R` for HDL-compatible inputs. + - hdl/sumstats/trait2_canonical.tsv: Synthetic toy HDL-format canonical summary statistics for trait 2, generated by `generate_toy_hdl_data.R` for HDL-compatible inputs. - svsig: - NA03697B2_new.pbmm2.repeats.svsig.gz: structural variant file for NA03697B2_new.pbmm2.repeats.bam, created with PBSV discover version (2.9.0 default settings) From 2f7f62acd6210f0fb9b6677e3952211144bfa71d Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sun, 22 Mar 2026 17:09:54 +0800 Subject: [PATCH 5/5] Move HDL toy sumstats into shared popgen path --- README.md | 4 +-- .../homo_sapiens/popgen/hdl/README.md | 6 ++-- .../popgen/hdl/generate_toy_hdl_data.R | 2 +- .../homo_sapiens/popgen/sumstats/README.md | 31 +++++++++++++++++++ .../{hdl => }/sumstats/trait1_canonical.tsv | 0 .../{hdl => }/sumstats/trait2_canonical.tsv | 0 6 files changed, 37 insertions(+), 6 deletions(-) create mode 100644 data/genomics/homo_sapiens/popgen/sumstats/README.md rename data/genomics/homo_sapiens/popgen/{hdl => }/sumstats/trait1_canonical.tsv (100%) rename data/genomics/homo_sapiens/popgen/{hdl => }/sumstats/trait2_canonical.tsv (100%) diff --git a/README.md b/README.md index 80b784bb2..be877c3d5 100644 --- a/README.md +++ b/README.md @@ -698,8 +698,8 @@ The earth sciences folder contain subfolders for different data formats encounte - hdl/reference/chr1.2_toy.rda: Synthetic toy HDL-format LD reference payload for chunk 1.2, generated by `generate_toy_hdl_data.R` for HDL-compatible inputs. - hdl/reference/toy_snp_counter.RData: Synthetic toy HDL-format SNP count metadata, generated by `generate_toy_hdl_data.R` for HDL-compatible inputs. - hdl/reference/toy_snp_list.RData: Synthetic toy HDL-format SNP list metadata, generated by `generate_toy_hdl_data.R` for HDL-compatible inputs. - - hdl/sumstats/trait1_canonical.tsv: Synthetic toy HDL-format canonical summary statistics for trait 1, generated by `generate_toy_hdl_data.R` for HDL-compatible inputs. - - hdl/sumstats/trait2_canonical.tsv: Synthetic toy HDL-format canonical summary statistics for trait 2, generated by `generate_toy_hdl_data.R` for HDL-compatible inputs. + - sumstats/trait1_canonical.tsv: Synthetic canonical toy summary statistics for trait 1, generated by `hdl/generate_toy_hdl_data.R` for small GWAS-style module inputs. + - sumstats/trait2_canonical.tsv: Synthetic canonical toy summary statistics for trait 2, generated by `hdl/generate_toy_hdl_data.R` for small GWAS-style module inputs. - svsig: - NA03697B2_new.pbmm2.repeats.svsig.gz: structural variant file for NA03697B2_new.pbmm2.repeats.bam, created with PBSV discover version (2.9.0 default settings) diff --git a/data/genomics/homo_sapiens/popgen/hdl/README.md b/data/genomics/homo_sapiens/popgen/hdl/README.md index d45098048..f82369fe7 100644 --- a/data/genomics/homo_sapiens/popgen/hdl/README.md +++ b/data/genomics/homo_sapiens/popgen/hdl/README.md @@ -8,7 +8,7 @@ scientific LD reference panel or redistributed upstream reference bundle. ## Layout - `reference/`: toy HDL LD reference chunks and metadata sidecars -- `sumstats/`: canonical toy summary-statistics tables aligned to the toy SNPs +- `../sumstats/`: canonical toy summary-statistics tables aligned to the toy SNPs ## Regeneration @@ -37,5 +37,5 @@ synthetic constants in this directory. counts. - `reference/toy_snp_list.RData` contains `snps.list.imputed.vector`, the four synthetic SNP IDs shared by the toy fixtures. -- `sumstats/trait1_canonical.tsv` and `sumstats/trait2_canonical.tsv` are tiny - canonical summary-statistics tables keyed to those synthetic SNP IDs. +- `../sumstats/trait1_canonical.tsv` and `../sumstats/trait2_canonical.tsv` are + tiny canonical summary-statistics tables keyed to those synthetic SNP IDs. diff --git a/data/genomics/homo_sapiens/popgen/hdl/generate_toy_hdl_data.R b/data/genomics/homo_sapiens/popgen/hdl/generate_toy_hdl_data.R index 00be2a31d..799271dfe 100644 --- a/data/genomics/homo_sapiens/popgen/hdl/generate_toy_hdl_data.R +++ b/data/genomics/homo_sapiens/popgen/hdl/generate_toy_hdl_data.R @@ -10,7 +10,7 @@ if (length(script_path) != 1 || script_path == "") { script_dir <- dirname(normalizePath(script_path)) reference_dir <- file.path(script_dir, "reference") -sumstats_dir <- file.path(script_dir, "sumstats") +sumstats_dir <- file.path(script_dir, "..", "sumstats") dir.create(reference_dir, recursive = TRUE, showWarnings = FALSE) dir.create(sumstats_dir, recursive = TRUE, showWarnings = FALSE) diff --git a/data/genomics/homo_sapiens/popgen/sumstats/README.md b/data/genomics/homo_sapiens/popgen/sumstats/README.md new file mode 100644 index 000000000..48aa5f55e --- /dev/null +++ b/data/genomics/homo_sapiens/popgen/sumstats/README.md @@ -0,0 +1,31 @@ +# Toy Population-Genetics Summary Statistics + +These files are tiny synthetic GWAS-style summary-statistics tables intended for +module testing. They are generated from fixed constants by the companion HDL +fixture generator at `../hdl/generate_toy_hdl_data.R`. + +## Layout + +- `trait1_canonical.tsv`: synthetic canonical summary statistics for trait 1 +- `trait2_canonical.tsv`: synthetic canonical summary statistics for trait 2 + +## Regeneration + +From the `hdl/` directory: + +```bash +Rscript generate_toy_hdl_data.R +``` + +From the root of the `nf-core/test-datasets` worktree: + +```bash +Rscript data/genomics/homo_sapiens/popgen/hdl/generate_toy_hdl_data.R +``` + +## Notes + +These tables are not HDL-specific at the file-format level. They are kept under +`popgen/sumstats/` so they can be reused by modules that consume small +GWAS-style tabular inputs, while the HDL reference panel assets remain grouped +under `popgen/hdl/reference/`. diff --git a/data/genomics/homo_sapiens/popgen/hdl/sumstats/trait1_canonical.tsv b/data/genomics/homo_sapiens/popgen/sumstats/trait1_canonical.tsv similarity index 100% rename from data/genomics/homo_sapiens/popgen/hdl/sumstats/trait1_canonical.tsv rename to data/genomics/homo_sapiens/popgen/sumstats/trait1_canonical.tsv diff --git a/data/genomics/homo_sapiens/popgen/hdl/sumstats/trait2_canonical.tsv b/data/genomics/homo_sapiens/popgen/sumstats/trait2_canonical.tsv similarity index 100% rename from data/genomics/homo_sapiens/popgen/hdl/sumstats/trait2_canonical.tsv rename to data/genomics/homo_sapiens/popgen/sumstats/trait2_canonical.tsv