Skip to content

Commit ccac8f4

Browse files
committed
test: dbt helpers — direct unit tests for shared manifest utilities
The 7 exported functions in src/altimate/native/dbt/helpers.ts power dbt-lineage, dbt-unit-test-gen, and dbt-manifest handlers but had zero direct unit tests. A silent regression in findModel or detectDialect cascades across multiple dbt tools. 32 new tests cover model lookup, dialect mapping, schema context building, column extraction, and manifest caching/invalidation. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> https://claude.ai/code/session_01G3L4LeyQGJ2ox4ssaqb3Kb
1 parent f030bf8 commit ccac8f4

1 file changed

Lines changed: 305 additions & 0 deletions

File tree

Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
/**
2+
* Direct unit tests for dbt native helper functions in
3+
* src/altimate/native/dbt/helpers.ts.
4+
*
5+
* These pure functions power dbt-lineage, dbt-unit-test-gen, and
6+
* dbt-manifest handlers. Previously only tested indirectly through
7+
* dbtLineage() in dbt-lineage-helpers.test.ts. Direct tests catch
8+
* regressions in isolation: a broken findModel or detectDialect
9+
* silently degrades multiple downstream tools.
10+
*/
11+
12+
import { describe, test, expect, beforeEach, afterEach } from "bun:test"
13+
import * as fs from "fs"
14+
import * as path from "path"
15+
import * as os from "os"
16+
17+
import {
18+
loadRawManifest,
19+
findModel,
20+
getUniqueId,
21+
detectDialect,
22+
buildSchemaContext,
23+
extractColumns,
24+
listModelNames,
25+
} from "../../src/altimate/native/dbt/helpers"
26+
27+
// ---------- findModel ----------
28+
29+
describe("findModel", () => {
30+
const nodes: Record<string, any> = {
31+
"model.project.orders": { resource_type: "model", name: "orders" },
32+
"model.project.users": { resource_type: "model", name: "users" },
33+
"source.project.raw.events": { resource_type: "source", name: "events" },
34+
"test.project.not_null": { resource_type: "test", name: "not_null" },
35+
}
36+
37+
test("finds model by exact unique_id key", () => {
38+
expect(findModel(nodes, "model.project.orders")).toEqual(nodes["model.project.orders"])
39+
})
40+
41+
test("finds model by name when unique_id does not match", () => {
42+
expect(findModel(nodes, "users")).toEqual(nodes["model.project.users"])
43+
})
44+
45+
test("returns null for source nodes (not resource_type=model)", () => {
46+
expect(findModel(nodes, "events")).toBeNull()
47+
})
48+
49+
test("returns null for nonexistent model", () => {
50+
expect(findModel(nodes, "nonexistent")).toBeNull()
51+
})
52+
53+
test("returns null for empty nodes", () => {
54+
expect(findModel({}, "orders")).toBeNull()
55+
})
56+
57+
test("returns a model when multiple models share the same name", () => {
58+
const dupes: Record<string, any> = {
59+
"model.a.orders": { resource_type: "model", name: "orders" },
60+
"model.b.orders": { resource_type: "model", name: "orders" },
61+
}
62+
const result = findModel(dupes, "orders")
63+
expect(result).not.toBeNull()
64+
expect(result.resource_type).toBe("model")
65+
})
66+
})
67+
68+
// ---------- getUniqueId ----------
69+
70+
describe("getUniqueId", () => {
71+
const nodes: Record<string, any> = {
72+
"model.project.orders": { resource_type: "model", name: "orders" },
73+
"source.project.raw.events": { resource_type: "source", name: "events" },
74+
}
75+
76+
test("returns key when exact unique_id exists and is a model", () => {
77+
expect(getUniqueId(nodes, "model.project.orders")).toBe("model.project.orders")
78+
})
79+
80+
test("returns unique_id when looked up by name", () => {
81+
expect(getUniqueId(nodes, "orders")).toBe("model.project.orders")
82+
})
83+
84+
test("returns undefined for source node (not resource_type=model)", () => {
85+
expect(getUniqueId(nodes, "events")).toBeUndefined()
86+
})
87+
88+
test("returns undefined for nonexistent model", () => {
89+
expect(getUniqueId(nodes, "nonexistent")).toBeUndefined()
90+
})
91+
})
92+
93+
// ---------- detectDialect ----------
94+
95+
describe("detectDialect", () => {
96+
test("maps known adapter types to dialect strings", () => {
97+
const cases: Array<[string, string]> = [
98+
["snowflake", "snowflake"],
99+
["bigquery", "bigquery"],
100+
["databricks", "databricks"],
101+
["spark", "spark"],
102+
["postgres", "postgres"],
103+
["redshift", "redshift"],
104+
["duckdb", "duckdb"],
105+
["clickhouse", "clickhouse"],
106+
["mysql", "mysql"],
107+
["sqlserver", "tsql"],
108+
["trino", "trino"],
109+
]
110+
for (const [adapter, expected] of cases) {
111+
expect(detectDialect({ metadata: { adapter_type: adapter } })).toBe(expected)
112+
}
113+
})
114+
115+
test("returns unmapped adapter type verbatim (truthy passthrough)", () => {
116+
expect(detectDialect({ metadata: { adapter_type: "athena" } })).toBe("athena")
117+
})
118+
119+
test("defaults to 'snowflake' when no metadata", () => {
120+
expect(detectDialect({})).toBe("snowflake")
121+
})
122+
123+
test("defaults to 'snowflake' when adapter_type is empty string", () => {
124+
expect(detectDialect({ metadata: { adapter_type: "" } })).toBe("snowflake")
125+
})
126+
127+
test("defaults to 'snowflake' when metadata is null", () => {
128+
expect(detectDialect({ metadata: null })).toBe("snowflake")
129+
})
130+
})
131+
132+
// ---------- buildSchemaContext ----------
133+
134+
describe("buildSchemaContext", () => {
135+
const nodes: Record<string, any> = {
136+
"model.project.upstream_a": {
137+
resource_type: "model",
138+
name: "upstream_a",
139+
alias: "upstream_alias",
140+
columns: {
141+
id: { name: "id", data_type: "INTEGER" },
142+
name: { name: "name", data_type: "VARCHAR" },
143+
},
144+
},
145+
"model.project.upstream_b": {
146+
resource_type: "model",
147+
name: "upstream_b",
148+
columns: {},
149+
},
150+
}
151+
const sources: Record<string, any> = {
152+
"source.project.raw.events": {
153+
name: "events",
154+
columns: {
155+
event_id: { name: "event_id", data_type: "BIGINT" },
156+
},
157+
},
158+
}
159+
160+
test("builds schema context using alias over name", () => {
161+
const result = buildSchemaContext(nodes, sources, ["model.project.upstream_a"])
162+
expect(result).not.toBeNull()
163+
expect(result!.version).toBe("1")
164+
// Alias takes precedence over name
165+
expect(result!.tables["upstream_alias"]).toBeDefined()
166+
expect(result!.tables["upstream_alias"].columns).toHaveLength(2)
167+
// Name key must NOT exist when alias is present
168+
expect(result!.tables["upstream_a"]).toBeUndefined()
169+
})
170+
171+
test("skips upstream models with empty columns", () => {
172+
const result = buildSchemaContext(nodes, sources, ["model.project.upstream_b"])
173+
expect(result).toBeNull()
174+
})
175+
176+
test("resolves upstream IDs from sources", () => {
177+
const result = buildSchemaContext(nodes, sources, ["source.project.raw.events"])
178+
expect(result).not.toBeNull()
179+
expect(result!.tables["events"]).toBeDefined()
180+
expect(result!.tables["events"].columns).toEqual([
181+
{ name: "event_id", type: "BIGINT" },
182+
])
183+
})
184+
185+
test("returns null when no upstream IDs provided", () => {
186+
expect(buildSchemaContext(nodes, sources, [])).toBeNull()
187+
})
188+
189+
test("returns null when upstream IDs do not resolve", () => {
190+
expect(buildSchemaContext(nodes, sources, ["model.project.ghost"])).toBeNull()
191+
})
192+
})
193+
194+
// ---------- extractColumns ----------
195+
196+
describe("extractColumns", () => {
197+
test("extracts column with data_type and description", () => {
198+
const dict = {
199+
id: { name: "id", data_type: "INTEGER", description: "Primary key" },
200+
}
201+
const cols = extractColumns(dict)
202+
expect(cols).toHaveLength(1)
203+
expect(cols[0]).toEqual({ name: "id", data_type: "INTEGER", description: "Primary key" })
204+
})
205+
206+
test("falls back to 'type' field when data_type is missing", () => {
207+
const dict = {
208+
name: { name: "name", type: "VARCHAR" },
209+
}
210+
const cols = extractColumns(dict)
211+
expect(cols).toHaveLength(1)
212+
expect(cols[0].name).toBe("name")
213+
expect(cols[0].data_type).toBe("VARCHAR")
214+
expect(cols[0].description).toBeUndefined()
215+
})
216+
217+
test("uses dict key as column name when col.name is missing", () => {
218+
const dict = { amount: { data_type: "DECIMAL" } }
219+
const cols = extractColumns(dict)
220+
expect(cols[0].name).toBe("amount")
221+
})
222+
223+
test("returns empty array for empty dict", () => {
224+
expect(extractColumns({})).toEqual([])
225+
})
226+
})
227+
228+
// ---------- listModelNames ----------
229+
230+
describe("listModelNames", () => {
231+
test("returns only model names, excluding sources and tests", () => {
232+
const nodes: Record<string, any> = {
233+
"model.p.a": { resource_type: "model", name: "alpha" },
234+
"source.p.b": { resource_type: "source", name: "beta" },
235+
"model.p.c": { resource_type: "model", name: "gamma" },
236+
"test.p.d": { resource_type: "test", name: "delta" },
237+
}
238+
const names = listModelNames(nodes)
239+
expect(names).toEqual(["alpha", "gamma"])
240+
})
241+
242+
test("returns empty array for no models", () => {
243+
expect(listModelNames({})).toEqual([])
244+
})
245+
})
246+
247+
// ---------- loadRawManifest ----------
248+
249+
describe("loadRawManifest", () => {
250+
let tmpDir: string
251+
252+
beforeEach(() => {
253+
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "dbt-helpers-test-"))
254+
})
255+
256+
afterEach(() => {
257+
fs.rmSync(tmpDir, { recursive: true, force: true })
258+
})
259+
260+
test("returns null for non-existent file", () => {
261+
expect(loadRawManifest(path.join(tmpDir, "nonexistent.json"))).toBeNull()
262+
})
263+
264+
test("parses valid manifest file", () => {
265+
const manifestPath = path.join(tmpDir, "manifest.json")
266+
fs.writeFileSync(manifestPath, JSON.stringify({ nodes: {}, metadata: { adapter_type: "snowflake" } }))
267+
const result = loadRawManifest(manifestPath)
268+
expect(result).not.toBeNull()
269+
expect(result.metadata.adapter_type).toBe("snowflake")
270+
})
271+
272+
test("throws on invalid JSON", () => {
273+
const manifestPath = path.join(tmpDir, "bad.json")
274+
fs.writeFileSync(manifestPath, "not json {{{")
275+
expect(() => loadRawManifest(manifestPath)).toThrow()
276+
})
277+
278+
test("throws when manifest is a primitive (not an object)", () => {
279+
// typeof 42 === "number", triggers the non-object guard
280+
const manifestPath = path.join(tmpDir, "number.json")
281+
fs.writeFileSync(manifestPath, "42")
282+
expect(() => loadRawManifest(manifestPath)).toThrow("Manifest is not a JSON object")
283+
})
284+
285+
test("caches by path+mtime (same reference returned)", () => {
286+
const manifestPath = path.join(tmpDir, "cached.json")
287+
fs.writeFileSync(manifestPath, JSON.stringify({ v: 1 }))
288+
const first = loadRawManifest(manifestPath)
289+
const second = loadRawManifest(manifestPath)
290+
// Same object reference from cache
291+
expect(first).toBe(second)
292+
})
293+
294+
test("invalidates cache when file content is rewritten", () => {
295+
const manifestPath = path.join(tmpDir, "updated.json")
296+
fs.writeFileSync(manifestPath, JSON.stringify({ v: 1 }))
297+
const first = loadRawManifest(manifestPath)
298+
299+
// Rewrite — OS updates mtime to current time, which differs from
300+
// the first write's mtime (millisecond-resolution on modern Linux).
301+
fs.writeFileSync(manifestPath, JSON.stringify({ v: 2 }))
302+
const second = loadRawManifest(manifestPath)
303+
expect(second.v).toBe(2)
304+
})
305+
})

0 commit comments

Comments
 (0)