biodatageeks
diff --git a/‎Cargo.lock‎
Lines changed: 76 additions & 20 deletions b/‎Cargo.lock‎
Lines changed: 76 additions & 20 deletions
diff --git a/‎benchmark-infra/terraform/main.tf‎
Lines changed: 2 additions & 2 deletions b/‎benchmark-infra/terraform/main.tf‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎datafusion/bio-function-vep/Cargo.toml‎
Lines changed: 1 addition & 1 deletion b/‎datafusion/bio-function-vep/Cargo.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎datafusion/bio-function-vep/examples/annotate_vep_golden_bench.rs‎
Lines changed: 59 additions & 0 deletions b/‎datafusion/bio-function-vep/examples/annotate_vep_golden_bench.rs‎
Lines changed: 59 additions & 0 deletions
@@ -184,8 +184,8 @@ resource "ansible_playbook" "provision" {
 # ── Outputs ──────────────────────────────────────────────────────────
 
 output "project_id" {
-  description = "GCP project ID (protected from destroy)"
-  value       = google_project.benchmark.project_id
+  description = "GCP project ID (survives destroy)"
+  value       = var.project_id
 }
 
 output "instance_ip" {
 
@@ -29,7 +29,7 @@ parquet = { version = "56", features = ["arrow"] }
 
 [dev-dependencies]
 tokio = { workspace = true, features = ["rt-multi-thread", "macros"] }
-datafusion-bio-format-vcf = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "45562f3beb230c23008b19bfe6c172bd1c5923fa" }
+datafusion-bio-format-vcf = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "e92ff6fa169d611d67e280551cd3e1a037254093" }
 noodles-vcf = { git = "https://github.com/biodatageeks/noodles.git", rev = "9b7b2c5b6531373918302d4c07410e583f1b5b5c" }
 env_logger = "0.11"
 tempfile = "3"
 
@@ -403,6 +403,65 @@ async fn main() -> Result<()> {
 /// Scans context_dir for files matching known patterns and builds the
 /// JSON string to pass as the 4th argument to `annotate_vep()`.
 fn build_options_json(args: &Args) -> Result<Option<String>> {
+    // Detect partitioned per-chromosome cache layout.
+    let is_partitioned = Path::new(&args.cache_source).join("variation").is_dir();
+    if is_partitioned {
+        // Partitioned path: AnnotateProvider::scan() handles everything.
+        let mut entries = Vec::new();
+        entries.push("\"partitioned\":true".to_string());
+        entries.push(format!("\"extended_probes\":{}", args.extended_probes));
+
+        if args.everything {
+            entries.push("\"everything\":true".to_string());
+            let fasta_path = args.reference_fasta_path.as_ref().ok_or_else(|| {
+                DataFusionError::Execution(
+                    "--everything requires --reference-fasta-path=/path/to/reference.fa[.gz]"
+                        .to_string(),
+                )
+            })?;
+            entries.push(format!(
+                "\"reference_fasta_path\":\"{}\"",
+                sql_literal(fasta_path.to_str().ok_or_else(|| {
+                    DataFusionError::Execution(
+                        "reference_fasta_path must be valid UTF-8".to_string(),
+                    )
+                })?)
+            ));
+        } else {
+            if args.hgvs {
+                entries.push("\"hgvs\":true".to_string());
+                if let Some(shift_hgvs) = args.shift_hgvs {
+                    entries.push(format!("\"shift_hgvs\":{shift_hgvs}"));
+                }
+                let fasta_path = args.reference_fasta_path.as_ref().ok_or_else(|| {
+                    DataFusionError::Execution(
+                        "--hgvs requires --reference-fasta-path=/path/to/reference.fa[.gz]"
+                            .to_string(),
+                    )
+                })?;
+                entries.push(format!(
+                    "\"reference_fasta_path\":\"{}\"",
+                    sql_literal(fasta_path.to_str().ok_or_else(|| {
+                        DataFusionError::Execution(
+                            "reference_fasta_path must be valid UTF-8".to_string(),
+                        )
+                    })?)
+                ));
+            }
+            entries.push("\"check_existing\":true".to_string());
+            entries.push("\"af\":true".to_string());
+            entries.push("\"af_1kg\":true".to_string());
+            entries.push("\"af_gnomade\":true".to_string());
+            entries.push("\"af_gnomadg\":true".to_string());
+            entries.push("\"max_af\":true".to_string());
+            entries.push("\"pubmed\":true".to_string());
+        }
+        if args.merged {
+            entries.push("\"merged\":true".to_string());
+        }
+        return Ok(Some(format!("{{{}}}", entries.join(","))));
+    }
+
     // Use explicit context_dir, or derive from cache_source parent directory.
     let context_dir = args
         .context_dir
Original file line number	Diff line number	Diff line change
`@@ -184,8 +184,8 @@ resource "ansible_playbook" "provision" {`
`184`	`184`	`# ── Outputs ──────────────────────────────────────────────────────────`
`185`	`185`
`186`	`186`	`output "project_id" {`
`187`		`- description = "GCP project ID (protected from destroy)"`
`188`		`- value = google_project.benchmark.project_id`
	`187`	`+ description = "GCP project ID (survives destroy)"`
	`188`	`+ value = var.project_id`
`189`	`189`	`}`
`190`	`190`
`191`	`191`	`output "instance_ip" {`