fix: [AI-673] address PR #674 review comments (CodeRabbit + cubic)

anandgupta42 · claude · anandgupta42 · commit 04bdbc4b321b · 2026-04-09T22:11:44.000-07:00
Address review comments from CodeRabbit and cubic on PR #674. Bug fixes: - `resolveUpstream` now resolves seed.* and snapshot.* deps in addition to models and sources. Previously silently dropped, causing `dbt test` to fail for any model ref()ing a seed or snapshot. - Test name truncation no longer cuts off scenario suffix. Budget suffix length first, then truncate the model-name portion, so scenario names like `_null_handling_2` are always preserved even for long model names. - `findModel`/`getUniqueId` in helpers.ts now validate `resource_type === "model"` on the key lookup path, not just the name fallback. Prevents returning non-model nodes by unique_id. - Division detection regex now strips string literals AND comments before matching, so `'2024/01/15'` no longer triggers a false-positive boundary scenario. Documentation fixes: - `incremental-testing.md`: fix Jinja syntax — `{{ if is_incremental() }}` is invalid; use `{% if is_incremental() %}` for control flow. - `SKILL.md`: workflow header now shows all 5 phases (Analyze -> Generate -> Refine -> Validate -> Write). - `SKILL.md`: add language label to fenced code block for markdownlint. - `unit-test-yaml-spec.md`: show both top-level `tags` and nested `config.tags` forms explicitly. Infrastructure: - Add `seeds` and `snapshots` arrays to `DbtManifestResult` (previously only counts were returned). `parseManifest` now extracts full seed and snapshot info using the same shape as `DbtModelInfo`. - Tests migrate to shared `tmpdir()` fixture from `test/fixture/fixture.ts` for automatic cleanup (per project coding guidelines). - Wrap `DbtUnitTestGenTool` import/registration in `registry.ts` with `altimate_change` markers (upstream-shared file). New tests (4): - seed deps resolve via ref() not source() - snapshot deps resolve via ref() not source() - long model names preserve scenario suffix (no truncation collision) - division in string literals does not trigger boundary scenario Full suite: 2676 pass, 0 fail. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
diff --git a/.opencode/skills/dbt-unit-tests/SKILL.md b/.opencode/skills/dbt-unit-tests/SKILL.md
@@ -33,7 +33,7 @@ description: Generate dbt unit tests automatically for any model. Analyzes SQL l
 4. **Never weaken a test to make it pass.** If the test fails, the model logic may be wrong. Investigate before changing expected values.
 5. **Compile before committing.** Always run `altimate-dbt test --model <name>` to verify tests compile and execute.
 
-## Core Workflow: Analyze -> Generate -> Refine -> Validate
+## Core Workflow: Analyze -> Generate -> Refine -> Validate -> Write
 
 ### Phase 1: Analyze the Model
 
@@ -61,7 +61,7 @@ dbt_unit_test_gen(manifest_path: "target/manifest.json", model: "<name>")
 
 The `dbt_unit_test_gen` tool does the heavy lifting:
 
-```
+```text
 dbt_unit_test_gen(
   manifest_path: "target/manifest.json",
   model: "fct_orders",
diff --git a/.opencode/skills/dbt-unit-tests/references/incremental-testing.md b/.opencode/skills/dbt-unit-tests/references/incremental-testing.md
@@ -1,16 +1,16 @@
 # Testing Incremental dbt Models
 
-Incremental models have two code paths controlled by `{{ if is_incremental() }}`. Both paths must be tested.
+Incremental models have two code paths controlled by `{% if is_incremental() %}`. Both paths must be tested.
 
 ## The Two Paths
 
 ```sql
--- Full refresh path (is_incremental = false)
 SELECT * FROM {{ ref('stg_orders') }}
 
--- Incremental path (is_incremental = true)
-SELECT * FROM {{ ref('stg_orders') }}
-WHERE updated_at > (SELECT MAX(updated_at) FROM {{ this }})
+{% if is_incremental() %}
+  -- Incremental path: only process new rows
+  WHERE updated_at > (SELECT MAX(updated_at) FROM {{ this }})
+{% endif %}
 ```
 
 ## Test 1: Full Refresh
diff --git a/.opencode/skills/dbt-unit-tests/references/unit-test-yaml-spec.md b/.opencode/skills/dbt-unit-tests/references/unit-test-yaml-spec.md
@@ -131,9 +131,24 @@ Used with `overrides.macros.is_incremental: true` to mock the existing table sta
 
 ## Configuration
 
+Tags can be set at the top level (sibling of `config`) or nested under `config`:
+
 ```yaml
-config:
-  tags: ["unit-test", "revenue"]
+unit_tests:
+  - name: test_example
+    model: fct_orders
+    tags: ["unit-test", "revenue"]
+    # ... rest of test
+```
+
+Or via config:
+
+```yaml
+unit_tests:
+  - name: test_example
+    model: fct_orders
+    config:
+      tags: ["unit-test", "revenue"]
 ```
 
 ## Naming Conventions
diff --git a/packages/opencode/src/altimate/native/dbt/helpers.ts b/packages/opencode/src/altimate/native/dbt/helpers.ts
@@ -41,9 +41,10 @@ export function loadRawManifest(manifestPath: string): any | null {
 
 /**
  * Find a model node in the manifest by name or unique_id.
+ * Only returns nodes where resource_type === "model".
  */
 export function findModel(nodes: Record<string, any>, model: string): any | null {
-  if (model in nodes) return nodes[model]
+  if (model in nodes && nodes[model]?.resource_type === "model") return nodes[model]
   for (const [, node] of Object.entries(nodes)) {
     if (node.resource_type !== "model") continue
     if (node.name === model) return node
@@ -53,9 +54,10 @@ export function findModel(nodes: Record<string, any>, model: string): any | null
 
 /**
  * Get the unique_id for a model (by name or unique_id lookup).
+ * Only matches nodes where resource_type === "model".
  */
 export function getUniqueId(nodes: Record<string, any>, model: string): string | undefined {
-  if (model in nodes) return model
+  if (model in nodes && nodes[model]?.resource_type === "model") return model
   for (const [nodeId, node] of Object.entries(nodes)) {
     if (node.resource_type === "model" && node.name === model) return nodeId
   }
diff --git a/packages/opencode/src/altimate/native/dbt/manifest.ts b/packages/opencode/src/altimate/native/dbt/manifest.ts
@@ -32,6 +32,8 @@ export async function parseManifest(params: DbtManifestParams): Promise<DbtManif
     models: [],
     sources: [],
     tests: [],
+    seeds: [],
+    snapshots: [],
     source_count: 0,
     model_count: 0,
     test_count: 0,
@@ -70,37 +72,34 @@ export async function parseManifest(params: DbtManifestParams): Promise<DbtManif
 
   const models: DbtModelInfo[] = []
   const tests: DbtTestInfo[] = []
+  const seeds: DbtModelInfo[] = []
+  const snapshots: DbtModelInfo[] = []
   let testCount = 0
-  let snapshotCount = 0
-  let seedCount = 0
 
   for (const [nodeId, node] of Object.entries<any>(nodes)) {
     const resourceType = node.resource_type
 
-    if (resourceType === "model") {
-      const dependsOnNodes = node.depends_on?.nodes || []
-      const columns = extractColumns(node.columns || {})
-      models.push({
+    if (resourceType === "model" || resourceType === "seed" || resourceType === "snapshot") {
+      const info: DbtModelInfo = {
         unique_id: nodeId,
         name: node.name || "",
         description: node.description || undefined,
         schema_name: node.schema || undefined,
         database: node.database || undefined,
         materialized: node.config?.materialized || undefined,
-        depends_on: dependsOnNodes,
-        columns,
-      })
+        depends_on: node.depends_on?.nodes || [],
+        columns: extractColumns(node.columns || {}),
+      }
+      if (resourceType === "model") models.push(info)
+      else if (resourceType === "seed") seeds.push(info)
+      else snapshots.push(info)
     } else if (resourceType === "test") {
       testCount++
       tests.push({
         unique_id: nodeId,
         name: node.name || "",
         depends_on: node.depends_on?.nodes || [],
       })
-    } else if (resourceType === "snapshot") {
-      snapshotCount++
-    } else if (resourceType === "seed") {
-      seedCount++
     }
   }
 
@@ -122,11 +121,13 @@ export async function parseManifest(params: DbtManifestParams): Promise<DbtManif
     models,
     sources,
     tests,
+    seeds,
+    snapshots,
     source_count: sources.length,
     model_count: models.length,
     test_count: testCount,
-    snapshot_count: snapshotCount,
-    seed_count: seedCount,
+    snapshot_count: snapshots.length,
+    seed_count: seeds.length,
     adapter_type: manifest.metadata?.adapter_type || undefined,
   }
 }
diff --git a/packages/opencode/src/altimate/native/dbt/unit-tests.ts b/packages/opencode/src/altimate/native/dbt/unit-tests.ts
@@ -73,7 +73,7 @@ interface UpstreamDep {
   schema_name?: string
   database?: string
   description?: string
-  resource_type: "model" | "source"
+  resource_type: "model" | "source" | "seed" | "snapshot"
   materialized?: string
   columns: ModelColumn[]
 }
@@ -82,23 +82,30 @@ function resolveUpstream(
   upstreamIds: string[],
   models: DbtModelInfo[],
   sources: DbtSourceInfo[],
+  seeds: DbtModelInfo[],
+  snapshots: DbtModelInfo[],
 ): UpstreamDep[] {
-  const modelMap = new Map(models.map((m) => [m.unique_id, m]))
+  // Map each unique_id to its info + resource_type.
+  // Seeds, snapshots, and models all use ref() so they share handling.
+  const typedMap = new Map<string, { info: DbtModelInfo; kind: "model" | "seed" | "snapshot" }>()
+  for (const m of models) typedMap.set(m.unique_id, { info: m, kind: "model" })
+  for (const s of seeds) typedMap.set(s.unique_id, { info: s, kind: "seed" })
+  for (const s of snapshots) typedMap.set(s.unique_id, { info: s, kind: "snapshot" })
   const sourceMap = new Map(sources.map((s) => [s.unique_id, s]))
 
   const result: UpstreamDep[] = []
   for (const uid of upstreamIds) {
-    const model = modelMap.get(uid)
-    if (model) {
+    const entry = typedMap.get(uid)
+    if (entry) {
       result.push({
         unique_id: uid,
-        name: model.name,
-        schema_name: model.schema_name,
-        database: model.database,
-        description: model.description,
-        resource_type: "model",
-        materialized: model.materialized,
-        columns: model.columns,
+        name: entry.info.name,
+        schema_name: entry.info.schema_name,
+        database: entry.info.database,
+        description: entry.info.description,
+        resource_type: entry.kind,
+        materialized: entry.info.materialized,
+        columns: entry.info.columns,
       })
       continue
     }
@@ -120,6 +127,7 @@ function resolveUpstream(
 }
 
 function depRef(dep: UpstreamDep): string {
+  // Models, seeds, and snapshots all use ref(); only sources use source()
   return dep.resource_type === "source"
     ? `source('${dep.source_name}', '${dep.name}')`
     : `ref('${dep.name}')`
@@ -204,8 +212,10 @@ export async function generateDbtUnitTests(
     warnings.push("Column lineage analysis failed — generating tests without lineage context")
   }
 
-  // 5. Resolve upstream deps from parsed manifest data (no raw nodes access)
-  const upstreamDeps = resolveUpstream(model.depends_on, manifest.models, manifest.sources)
+  // 5. Resolve upstream deps (models, sources, seeds, snapshots)
+  const upstreamDeps = resolveUpstream(
+    model.depends_on, manifest.models, manifest.sources, manifest.seeds, manifest.snapshots,
+  )
   const materialized = model.materialized || "view"
 
   // 6. Enrich columns from warehouse (parallel, best-effort)
@@ -320,8 +330,13 @@ interface Scenario {
  * directly for nuanced logic analysis. This just determines the scaffold.
  */
 function detectScenarios(sql: string, materialized: string): Scenario[] {
-  // Strip SQL comments to avoid false positives (e.g., "-- old/code" matching division)
-  const cleaned = sql.replace(/--.*$/gm, "").replace(/\/\*[\s\S]*?\*\//g, "")
+  // Strip SQL comments AND string literals to avoid false positives
+  // (e.g., "-- old/code", "'2024/01/15'", "'a/b'" matching division).
+  const cleaned = sql
+    .replace(/--.*$/gm, "")                  // line comments
+    .replace(/\/\*[\s\S]*?\*\//g, "")        // block comments
+    .replace(/'(?:[^'\\]|\\.|'')*'/g, "''")  // single-quoted strings
+    .replace(/"(?:[^"\\]|\\.|"")*"/g, '""')  // double-quoted identifiers/strings
   const upper = cleaned.toUpperCase()
   const scenarios: Scenario[] = [
     { category: "happy_path", description: "Verify correct output for standard input data", mockStyle: "happy_path", rowCount: 2 },
@@ -406,9 +421,14 @@ function buildTests(
   maxScenarios: number,
 ): UnitTestCase[] {
   return scenarios.slice(0, maxScenarios).map((scenario, idx) => {
-    const testName = sanitizeName(
-      `test_${modelName}_${scenario.category}${idx > 0 ? `_${idx}` : ""}`,
-    )
+    // Build the scenario suffix first, then truncate the model-name portion
+    // so the suffix is always preserved (prevents collisions for long names).
+    const suffix = `_${scenario.category}${idx > 0 ? `_${idx}` : ""}`
+    const prefix = "test_"
+    const maxLen = 64
+    const modelBudget = maxLen - prefix.length - suffix.length
+    const truncatedModel = modelName.length > modelBudget ? modelName.slice(0, Math.max(1, modelBudget)) : modelName
+    const testName = sanitizeName(`${prefix}${truncatedModel}${suffix}`)
 
     const given: UnitTestMockInput[] = deps.map((dep) => {
       const input = depRef(dep)
diff --git a/packages/opencode/src/altimate/native/types.ts b/packages/opencode/src/altimate/native/types.ts
@@ -200,6 +200,10 @@ export interface DbtManifestResult {
   models: DbtModelInfo[]
   sources: DbtSourceInfo[]
   tests: DbtTestInfo[]
+  /** Seeds parsed from the manifest (extracted like models for ref() resolution) */
+  seeds: DbtModelInfo[]
+  /** Snapshots parsed from the manifest (extracted like models for ref() resolution) */
+  snapshots: DbtModelInfo[]
   source_count: number
   model_count: number
   test_count: number
diff --git a/packages/opencode/src/tool/registry.ts b/packages/opencode/src/tool/registry.ts
@@ -47,7 +47,9 @@ import { WarehouseDiscoverTool } from "../altimate/tools/warehouse-discover"
 import { McpDiscoverTool } from "../altimate/tools/mcp-discover"
 
 import { DbtManifestTool } from "../altimate/tools/dbt-manifest"
+// altimate_change start - import dbt unit test generation tool
 import { DbtUnitTestGenTool } from "../altimate/tools/dbt-unit-test-gen"
+// altimate_change end
 import { DbtProfilesTool } from "../altimate/tools/dbt-profiles"
 import { DbtLineageTool } from "../altimate/tools/dbt-lineage"
 import { SchemaIndexTool } from "../altimate/tools/schema-index"
@@ -224,7 +226,9 @@ export namespace ToolRegistry {
       // altimate_change end
 
       DbtManifestTool,
+      // altimate_change start - register dbt unit test generation tool
       DbtUnitTestGenTool,
+      // altimate_change end
       DbtProfilesTool,
       DbtLineageTool,
       SchemaIndexTool,
diff --git a/packages/opencode/test/altimate/dbt-unit-test-gen.test.ts b/packages/opencode/test/altimate/dbt-unit-test-gen.test.ts