|
| 1 | +/** |
| 2 | + * Tests for CTE wrapping and injection in SQL-query mode. |
| 3 | + * |
| 4 | + * The tricky case is cross-warehouse comparison where source and target are both |
| 5 | + * SQL queries referencing tables that only exist on their own side. The combined |
| 6 | + * CTE prefix cannot be sent to both warehouses because T-SQL / Fabric parse-bind |
| 7 | + * every CTE body even when unreferenced — the "other side" CTE would fail to |
| 8 | + * resolve its base table. |
| 9 | + */ |
| 10 | +import { describe, test, expect } from "bun:test" |
| 11 | + |
| 12 | +import { resolveTableSources, injectCte } from "../../src/altimate/native/connections/data-diff" |
| 13 | + |
| 14 | +describe("resolveTableSources", () => { |
| 15 | + test("plain table names pass through without wrapping", () => { |
| 16 | + const r = resolveTableSources("orders", "orders_v2") |
| 17 | + expect(r.table1Name).toBe("orders") |
| 18 | + expect(r.table2Name).toBe("orders_v2") |
| 19 | + expect(r.ctePrefix).toBeNull() |
| 20 | + expect(r.sourceCtePrefix).toBeNull() |
| 21 | + expect(r.targetCtePrefix).toBeNull() |
| 22 | + }) |
| 23 | + |
| 24 | + test("schema-qualified plain names pass through", () => { |
| 25 | + const r = resolveTableSources("gold.dim_customer", "TRANSFORMED.DimCustomer") |
| 26 | + expect(r.table1Name).toBe("gold.dim_customer") |
| 27 | + expect(r.table2Name).toBe("TRANSFORMED.DimCustomer") |
| 28 | + expect(r.ctePrefix).toBeNull() |
| 29 | + }) |
| 30 | + |
| 31 | + test("both queries are wrapped in CTEs with aliases", () => { |
| 32 | + const r = resolveTableSources( |
| 33 | + "SELECT id, val FROM [TRANSFORMED].[DimCustomer]", |
| 34 | + "SELECT id, val FROM [gold].[dim_customer]", |
| 35 | + ) |
| 36 | + expect(r.table1Name).toBe("__diff_source") |
| 37 | + expect(r.table2Name).toBe("__diff_target") |
| 38 | + expect(r.ctePrefix).toContain("__diff_source AS (") |
| 39 | + expect(r.ctePrefix).toContain("__diff_target AS (") |
| 40 | + expect(r.ctePrefix).toContain("[TRANSFORMED].[DimCustomer]") |
| 41 | + expect(r.ctePrefix).toContain("[gold].[dim_customer]") |
| 42 | + }) |
| 43 | + |
| 44 | + test("side-specific prefixes contain only the relevant CTE", () => { |
| 45 | + const r = resolveTableSources( |
| 46 | + "SELECT id FROM [TRANSFORMED].[DimCustomer]", |
| 47 | + "SELECT id FROM [gold].[dim_customer]", |
| 48 | + ) |
| 49 | + // Source prefix has source table only — must not leak target table ref |
| 50 | + expect(r.sourceCtePrefix).toContain("__diff_source AS (") |
| 51 | + expect(r.sourceCtePrefix).toContain("[TRANSFORMED].[DimCustomer]") |
| 52 | + expect(r.sourceCtePrefix).not.toContain("__diff_target") |
| 53 | + expect(r.sourceCtePrefix).not.toContain("[gold].[dim_customer]") |
| 54 | + |
| 55 | + // Target prefix has target table only — must not leak source table ref |
| 56 | + expect(r.targetCtePrefix).toContain("__diff_target AS (") |
| 57 | + expect(r.targetCtePrefix).toContain("[gold].[dim_customer]") |
| 58 | + expect(r.targetCtePrefix).not.toContain("__diff_source") |
| 59 | + expect(r.targetCtePrefix).not.toContain("[TRANSFORMED].[DimCustomer]") |
| 60 | + }) |
| 61 | + |
| 62 | + test("mixed: plain source + query target still wraps both sides", () => { |
| 63 | + const r = resolveTableSources( |
| 64 | + "orders", |
| 65 | + "SELECT * FROM other.orders WHERE region = 'EU'", |
| 66 | + ) |
| 67 | + expect(r.table1Name).toBe("__diff_source") |
| 68 | + expect(r.table2Name).toBe("__diff_target") |
| 69 | + // Plain table wrapped with ANSI double-quoted identifiers |
| 70 | + expect(r.sourceCtePrefix).toContain('SELECT * FROM "orders"') |
| 71 | + expect(r.targetCtePrefix).toContain("other.orders") |
| 72 | + }) |
| 73 | + |
| 74 | + test("query detection requires both keyword AND whitespace", () => { |
| 75 | + // A table literally named "select" should NOT be treated as a query |
| 76 | + const r = resolveTableSources("select", "with") |
| 77 | + expect(r.table1Name).toBe("select") |
| 78 | + expect(r.table2Name).toBe("with") |
| 79 | + expect(r.ctePrefix).toBeNull() |
| 80 | + }) |
| 81 | +}) |
| 82 | + |
| 83 | +describe("injectCte", () => { |
| 84 | + test("prepends CTE prefix to a plain SELECT", () => { |
| 85 | + const prefix = "WITH __diff_source AS (\nSELECT 1 AS id\n)" |
| 86 | + const sql = "SELECT COUNT(*) FROM __diff_source" |
| 87 | + const out = injectCte(sql, prefix) |
| 88 | + expect(out.startsWith(prefix)).toBe(true) |
| 89 | + expect(out).toContain("SELECT COUNT(*) FROM __diff_source") |
| 90 | + }) |
| 91 | + |
| 92 | + test("merges with an engine-emitted WITH clause", () => { |
| 93 | + const prefix = "WITH __diff_source AS (\nSELECT * FROM base\n)" |
| 94 | + const engineSql = "WITH engine_cte AS (SELECT id FROM __diff_source) SELECT * FROM engine_cte" |
| 95 | + const out = injectCte(engineSql, prefix) |
| 96 | + // Must start with a single WITH, with our CTE first, then engine's |
| 97 | + expect(out.match(/^WITH /)).not.toBeNull() |
| 98 | + expect((out.match(/\bWITH\b/g) ?? []).length).toBe(1) |
| 99 | + expect(out.indexOf("__diff_source AS")).toBeLessThan(out.indexOf("engine_cte AS")) |
| 100 | + }) |
| 101 | + |
| 102 | + test("side-specific injection: source prefix does not leak target refs", () => { |
| 103 | + // Simulates cross-warehouse fp1_1 task going to MSSQL. It must not see any |
| 104 | + // reference to the Fabric-only target table, since MSSQL parse-binds every |
| 105 | + // CTE body. |
| 106 | + const r = resolveTableSources( |
| 107 | + "SELECT id FROM [TRANSFORMED].[DimCustomer]", |
| 108 | + "SELECT id FROM [gold].[dim_customer]", |
| 109 | + ) |
| 110 | + const engineFp1Sql = |
| 111 | + "SELECT COUNT(*), SUM(CAST(...HASHBYTES('MD5', CONCAT(CAST([id] AS NVARCHAR(MAX))))...)) FROM [__diff_source]" |
| 112 | + const sqlForMssql = injectCte(engineFp1Sql, r.sourceCtePrefix!) |
| 113 | + expect(sqlForMssql).toContain("[TRANSFORMED].[DimCustomer]") |
| 114 | + expect(sqlForMssql).not.toContain("[gold].[dim_customer]") |
| 115 | + expect(sqlForMssql).not.toContain("__diff_target") |
| 116 | + }) |
| 117 | + |
| 118 | + test("side-specific injection: target prefix does not leak source refs", () => { |
| 119 | + const r = resolveTableSources( |
| 120 | + "SELECT id FROM [TRANSFORMED].[DimCustomer]", |
| 121 | + "SELECT id FROM [gold].[dim_customer]", |
| 122 | + ) |
| 123 | + const engineFp2Sql = "SELECT COUNT(*) FROM [__diff_target]" |
| 124 | + const sqlForFabric = injectCte(engineFp2Sql, r.targetCtePrefix!) |
| 125 | + expect(sqlForFabric).toContain("[gold].[dim_customer]") |
| 126 | + expect(sqlForFabric).not.toContain("[TRANSFORMED].[DimCustomer]") |
| 127 | + expect(sqlForFabric).not.toContain("__diff_source") |
| 128 | + }) |
| 129 | +}) |
0 commit comments