Skip to content

Commit 5def236

Browse files
ystemsrxclaude
andcommitted
fix(parser): handle SQL Server GO, PG partitions, line comments, schema names
Four bugs surfaced from the multi-dialect test.sql stress file: - `--` line comments were never stripped (typo: char compared to "--"), which silently dropped any CREATE TABLE that followed a comment line. `"select"` with quoted reserved-word columns was the visible casualty. - T-SQL batch separator `GO` glued the next CREATE TABLE onto the prior statement, so `crm.Account` and `crm.Invoice` (plus their FK) never reached the graph. Standalone `GO` lines are now rewritten to `;` after comment stripping. - PostgreSQL `PARTITION OF` children (`pg_orders_2026_q1`, `pg_orders_default`) were producing empty floating nodes -- they're physical shards of the parent, not independent entities, so skip them. - Schema was being stripped from table names, collapsing `app.customer` and `crm.customer` into one node. Table names and FK targets now keep the qualified form (`schema.table`) end-to-end. Tests: refresh the existing public.Article assertion and add seven regressions covering each issue above plus generated-column expressions with commas and single-PK + inline REFERENCES 1:1 inference. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 709d29b commit 5def236

2 files changed

Lines changed: 221 additions & 10 deletions

File tree

src/parser/sql.ts

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ const stripSqlComments = (src: string) => {
3737
out += src[i++];
3838
}
3939
}
40-
} else if (ch === "-" && src[i + 1] === "--") {
40+
} else if (ch === "-" && src[i + 1] === "-") {
4141
while (i < src.length && src[i] !== "\n") i++;
4242
} else if (ch === "/" && src[i + 1] === "*") {
4343
i += 2;
@@ -100,14 +100,31 @@ const splitStatements = (sql: string) => {
100100
return statements;
101101
};
102102

103-
const cleanIdentifier = (raw: string) => {
104-
const parts = raw
103+
// 拆解一个可能带 schema 前缀的标识符为各段裸名(去引号 / 去反引号 / 去方括号)。
104+
const splitIdentifierParts = (raw: string) =>
105+
raw
105106
.split(".")
106107
.map((p) => p.trim().replace(/^[`"\[]|[`"\]]$/g, ""))
107108
.filter(Boolean);
109+
110+
// 仅取最末段的裸名(用于列名 —— 列名不会有 schema 前缀)。
111+
const cleanIdentifier = (raw: string) => {
112+
const parts = splitIdentifierParts(raw);
108113
return parts[parts.length - 1] || raw.trim();
109114
};
110115

116+
// 保留 schema 的限定名(用于表名与 FK 目标):`"app"."customer"` -> `app.customer`。
117+
// 不同 schema 下同名表才不会塌成同一个节点。
118+
const qualifiedIdentifier = (raw: string) => {
119+
const parts = splitIdentifierParts(raw);
120+
return parts.length ? parts.join(".") : raw.trim();
121+
};
122+
123+
// T-SQL 批处理分隔符 GO 单独成行时把它换成 `;`,让后续按 `;` 切分能识别两边。
124+
// 必须在去掉块/行注释之后做,否则会误伤注释里的 GO。
125+
const normalizeBatchSeparators = (sql: string) =>
126+
sql.replace(/^[\t ]*GO[\t ]*(?:\r?\n|$)/gim, ";\n");
127+
111128
const splitTopLevelComma = (body: string) => {
112129
const parts: string[] = [];
113130
let current = "";
@@ -182,7 +199,7 @@ const parseColumnType = (rest: string) => {
182199
export const parseSQLTables = (sql: string): ParseResult => {
183200
const tables: ParsedTable[] = [];
184201
const relationships: ParsedRelationship[] = [];
185-
const cleanSql = stripSqlComments(sql).trim();
202+
const cleanSql = normalizeBatchSeparators(stripSqlComments(sql)).trim();
186203

187204
splitStatements(cleanSql).forEach((statement) => {
188205
if (!/^\s*CREATE\s+(?:TEMP(?:ORARY)?\s+)?TABLE/i.test(statement)) return;
@@ -194,7 +211,12 @@ export const parseSQLTables = (sql: string): ParseResult => {
194211
),
195212
);
196213
if (!tableNameMatch) return;
197-
const tableName = cleanIdentifier(tableNameMatch[1]);
214+
const tableName = qualifiedIdentifier(tableNameMatch[1]);
215+
216+
// PostgreSQL 分区子表(`PARTITION OF parent ...`)不是独立实体 —— 它的列、PK、
217+
// FK 都继承自父表。强行解析会得到一个空节点漂在图上,干扰阅读。
218+
if (/\bPARTITION\s+OF\b/i.test(statement)) return;
219+
198220
const tableBody = extractMainBody(statement);
199221
if (!tableBody) return;
200222

@@ -223,7 +245,7 @@ export const parseSQLTables = (sql: string): ParseResult => {
223245
if (fkMatch) {
224246
foreignKeys.push({
225247
column: cleanIdentifier(fkMatch[1]),
226-
referencedTable: cleanIdentifier(fkMatch[2]),
248+
referencedTable: qualifiedIdentifier(fkMatch[2]),
227249
referencedColumn: cleanIdentifier(fkMatch[3]),
228250
});
229251
return;
@@ -254,7 +276,7 @@ export const parseSQLTables = (sql: string): ParseResult => {
254276
if (inlineRef) {
255277
foreignKeys.push({
256278
column: columnName,
257-
referencedTable: cleanIdentifier(inlineRef[1]),
279+
referencedTable: qualifiedIdentifier(inlineRef[1]),
258280
referencedColumn: cleanIdentifier(inlineRef[2]),
259281
});
260282
}

src/test/parser-sql.test.ts

Lines changed: 192 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ describe("parseSQLTables", () => {
4343
);
4444
`);
4545

46-
expect(result.tables[0].name).toBe("Article");
46+
expect(result.tables[0].name).toBe("public.Article");
4747
expect(result.tables[0].columns.map((c) => [c.name, c.type])).toEqual([
4848
["ArticleID", "UUID"],
4949
["AuthorID", "UUID"],
@@ -52,8 +52,8 @@ describe("parseSQLTables", () => {
5252
]);
5353
expect(result.relationships).toEqual([
5454
{
55-
from: "Article",
56-
to: "User",
55+
from: "public.Article",
56+
to: "public.User",
5757
label: "AuthorID",
5858
fromCardinality: "N",
5959
toCardinality: "1",
@@ -158,4 +158,193 @@ describe("parseSQLTables", () => {
158158
"email",
159159
]);
160160
});
161+
162+
// Regression: 之前 stripSqlComments 把 `--` 误写成 `--`-vs-`--` 的双字符比较,
163+
// 导致行注释从未被剥离,最终行注释后面紧跟的 CREATE TABLE 整体被拒。
164+
it("strips -- line comments so trailing CREATE TABLE is parsed", () => {
165+
const result = parseSQLTables(`
166+
-- prelude noise
167+
-- another line that mentions CREATE TABLE on the side
168+
CREATE TABLE solo (
169+
id INT PRIMARY KEY
170+
);
171+
`);
172+
expect(result.tables).toHaveLength(1);
173+
expect(result.tables[0].name).toBe("solo");
174+
});
175+
176+
// Regression: 引号包裹的保留字在前一段以 `--` 注释结尾时会丢失。
177+
it("parses quoted reserved-word identifiers (\"select\", \"from\", \"primary\")", () => {
178+
const result = parseSQLTables(`
179+
-- Reserved-ish identifiers
180+
CREATE TABLE "select" (
181+
"from" INTEGER NOT NULL,
182+
"where" TEXT,
183+
"group" TEXT,
184+
CONSTRAINT "primary" PRIMARY KEY ("from")
185+
);
186+
`);
187+
expect(result.tables).toHaveLength(1);
188+
expect(result.tables[0].name).toBe("select");
189+
expect(result.tables[0].columns.map((c) => c.name)).toEqual([
190+
"from",
191+
"where",
192+
"group",
193+
]);
194+
expect(result.tables[0].primaryKeys).toEqual(["from"]);
195+
});
196+
197+
// T-SQL 批处理分隔符 GO 不是合法的 ANSI SQL token,但在 SQL Server 脚本里
198+
// 极常见。如果 splitter 不识别,GO 之后的 CREATE TABLE 会被并入上一条语句、
199+
// 整段被丢弃 —— 之前 crm.Account / crm.Invoice 就是这样消失的。
200+
it("treats SQL Server GO as a batch separator", () => {
201+
const result = parseSQLTables(`
202+
IF SCHEMA_ID(N'crm') IS NULL
203+
EXEC(N'CREATE SCHEMA crm');
204+
GO
205+
206+
CREATE TABLE crm.[Account] (
207+
account_id BIGINT NOT NULL,
208+
[name] NVARCHAR(200) NOT NULL,
209+
CONSTRAINT pk_account PRIMARY KEY CLUSTERED (account_id)
210+
);
211+
GO
212+
213+
CREATE TABLE crm.Invoice (
214+
invoice_id BIGINT NOT NULL,
215+
account_id BIGINT NOT NULL,
216+
CONSTRAINT pk_invoice PRIMARY KEY (invoice_id),
217+
CONSTRAINT fk_invoice_account
218+
FOREIGN KEY (account_id)
219+
REFERENCES crm.[Account] (account_id)
220+
);
221+
GO
222+
`);
223+
expect(result.tables.map((t) => t.name)).toEqual([
224+
"crm.Account",
225+
"crm.Invoice",
226+
]);
227+
expect(result.relationships).toEqual([
228+
{
229+
from: "crm.Invoice",
230+
to: "crm.Account",
231+
label: "account_id",
232+
fromCardinality: "N",
233+
toCardinality: "1",
234+
},
235+
]);
236+
});
237+
238+
// PG 的分区子表只是父表的物理分片,没有自己的列定义。强行解析会得到一个
239+
// 空节点漂在图上 —— 直接跳过。
240+
it("skips PostgreSQL PARTITION OF child tables", () => {
241+
const result = parseSQLTables(`
242+
CREATE TABLE app.pg_orders (
243+
order_id BIGINT NOT NULL,
244+
placed_at TIMESTAMPTZ NOT NULL,
245+
CONSTRAINT pk_pg_orders PRIMARY KEY (order_id)
246+
) PARTITION BY RANGE (placed_at);
247+
248+
CREATE TABLE app.pg_orders_2026_q1
249+
PARTITION OF app.pg_orders
250+
FOR VALUES FROM ('2026-01-01') TO ('2026-04-01');
251+
252+
CREATE TABLE app.pg_orders_default
253+
PARTITION OF app.pg_orders
254+
DEFAULT;
255+
`);
256+
expect(result.tables.map((t) => t.name)).toEqual(["app.pg_orders"]);
257+
});
258+
259+
// schema 必须留在表名里,否则 app.customer 与 crm.customer 会塌成同一节点。
260+
// FK 目标也得带 schema,连线才能命中正确的实体。
261+
it("preserves schema on table names and FK targets to disambiguate same-named tables", () => {
262+
const result = parseSQLTables(`
263+
CREATE TABLE app.customer (
264+
customer_id BIGINT PRIMARY KEY,
265+
email VARCHAR(320)
266+
);
267+
268+
CREATE TABLE crm.customer (
269+
customer_id BIGINT PRIMARY KEY,
270+
account_id BIGINT NOT NULL,
271+
CONSTRAINT fk_crm_customer_account
272+
FOREIGN KEY (account_id) REFERENCES crm.account (account_id)
273+
);
274+
275+
CREATE TABLE app.address (
276+
address_id BIGINT PRIMARY KEY,
277+
customer_id BIGINT NOT NULL,
278+
CONSTRAINT fk_address_customer
279+
FOREIGN KEY (customer_id) REFERENCES "app"."customer" (customer_id)
280+
);
281+
`);
282+
expect(result.tables.map((t) => t.name)).toEqual([
283+
"app.customer",
284+
"crm.customer",
285+
"app.address",
286+
]);
287+
expect(result.relationships).toEqual([
288+
{
289+
from: "crm.customer",
290+
to: "crm.account",
291+
label: "account_id",
292+
fromCardinality: "N",
293+
toCardinality: "1",
294+
},
295+
{
296+
from: "app.address",
297+
to: "app.customer",
298+
label: "customer_id",
299+
fromCardinality: "N",
300+
toCardinality: "1",
301+
},
302+
]);
303+
});
304+
305+
// GENERATED ALWAYS AS (...) STORED 的表达式可能含逗号 / 括号,splitTopLevelComma
306+
// 必须按括号深度走,否则一列会被切成多片然后丢失。
307+
it("parses generated columns with comma-bearing expressions", () => {
308+
const result = parseSQLTables(`
309+
CREATE TABLE app.pg_order_items (
310+
order_id BIGINT NOT NULL,
311+
line_no INT NOT NULL,
312+
quantity INT NOT NULL DEFAULT 1,
313+
unit_price NUMERIC(12, 2) NOT NULL,
314+
discount NUMERIC(12, 2) NOT NULL DEFAULT 0,
315+
line_total NUMERIC(12, 2)
316+
GENERATED ALWAYS AS ((quantity * unit_price) - discount) STORED,
317+
CONSTRAINT pk_pg_order_items PRIMARY KEY (order_id, line_no)
318+
);
319+
`);
320+
expect(result.tables[0].columns.map((c) => c.name)).toEqual([
321+
"order_id",
322+
"line_no",
323+
"quantity",
324+
"unit_price",
325+
"discount",
326+
"line_total",
327+
]);
328+
expect(result.tables[0].primaryKeys).toEqual(["order_id", "line_no"]);
329+
});
330+
331+
// SQLite 的 inline `REFERENCES` + 单列 PK 应当推断为 1:1(FK 列即整张表的
332+
// 唯一主键 -> 关系两端都是 "1")。这是 sql.ts 里的“单列 PK 自动 1:1”规则。
333+
it("infers 1:1 from a single-column PK that is also the FK", () => {
334+
const result = parseSQLTables(`
335+
CREATE TABLE user_profiles (
336+
user_id INTEGER PRIMARY KEY REFERENCES users (id),
337+
bio TEXT
338+
);
339+
`);
340+
expect(result.relationships).toEqual([
341+
{
342+
from: "user_profiles",
343+
to: "users",
344+
label: "user_id",
345+
fromCardinality: "1",
346+
toCardinality: "1",
347+
},
348+
]);
349+
});
161350
});

0 commit comments

Comments
 (0)