Skip to content

Commit babc2c0

Browse files
authored
Merge pull request #290 from constructive-io/devin/1775205353-pgsql-parse-package
feat: add pgsql-parse package with comment and whitespace preservation
2 parents 16d4dce + 048caae commit babc2c0

25 files changed

Lines changed: 4240 additions & 5564 deletions

packages/parse/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
dist/

packages/parse/README.md

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# pgsql-parse
2+
3+
<p align="center" width="100%">
4+
<img height="250" src="https://raw.githubusercontent.com/constructive-io/constructive/refs/heads/main/assets/outline-logo.svg" />
5+
</p>
6+
7+
Comment and whitespace preserving PostgreSQL parser. A drop-in enhancement for `pgsql-parser` that preserves SQL comments (`--` line and `/* */` block) and vertical whitespace (blank lines) through parse-deparse round trips.
8+
9+
## Installation
10+
11+
```sh
12+
npm install pgsql-parse
13+
```
14+
15+
## Features
16+
17+
* **Comment Preservation** -- Retains `--` line comments and `/* */` block comments through parse-deparse cycles
18+
* **Vertical Whitespace** -- Preserves blank lines between statements for readable output
19+
* **Idempotent Round-Trips** -- `parse -> deparse -> parse -> deparse` produces identical output
20+
* **Drop-in API** -- Re-exports `parse`, `parseSync`, `deparse`, `deparseSync`, `loadModule` from `pgsql-parser`
21+
* **Synthetic AST Nodes** -- `RawComment` and `RawWhitespace` nodes interleaved into the `stmts` array by byte position
22+
23+
## How It Works
24+
25+
1. A pure TypeScript scanner extracts comment and whitespace tokens with byte positions from the raw SQL text
26+
2. Enhanced `parse`/`parseSync` call the standard `libpg-query` parser, then interleave synthetic `RawComment` and `RawWhitespace` nodes into the `stmts` array based on byte position
27+
3. `deparseEnhanced()` dispatches on node type -- real `RawStmt` entries go through the standard deparser, while synthetic nodes emit their comment text or blank lines directly
28+
29+
## API
30+
31+
### Enhanced Parse
32+
33+
```typescript
34+
import { parse, parseSync, deparseEnhanced, loadModule } from 'pgsql-parse';
35+
36+
// Async (handles initialization automatically)
37+
const result = await parse(`
38+
-- Create users table
39+
CREATE TABLE users (id serial PRIMARY KEY);
40+
41+
-- Create posts table
42+
CREATE TABLE posts (id serial PRIMARY KEY);
43+
`);
44+
45+
// result.stmts contains RawComment, RawWhitespace, and RawStmt nodes
46+
const sql = deparseEnhanced(result);
47+
// Output preserves comments and blank lines
48+
```
49+
50+
### Sync Methods
51+
52+
```typescript
53+
import { parseSync, deparseEnhanced, loadModule } from 'pgsql-parse';
54+
55+
await loadModule();
56+
57+
const result = parseSync('-- comment\nSELECT 1;');
58+
const sql = deparseEnhanced(result);
59+
```
60+
61+
### Type Guards
62+
63+
```typescript
64+
import { isRawComment, isRawWhitespace, isRawStmt } from 'pgsql-parse';
65+
66+
for (const stmt of result.stmts) {
67+
if (isRawComment(stmt)) {
68+
console.log('Comment:', stmt.RawComment.text);
69+
} else if (isRawWhitespace(stmt)) {
70+
console.log('Blank lines:', stmt.RawWhitespace.lines);
71+
} else if (isRawStmt(stmt)) {
72+
console.log('Statement:', stmt);
73+
}
74+
}
75+
```
76+
77+
## Credits
78+
79+
Built on the excellent work of several contributors:
80+
81+
* **[Dan Lynch](https://github.com/pyramation)** -- official maintainer since 2018 and architect of the current implementation
82+
* **[Lukas Fittl](https://github.com/lfittl)** for [libpg_query](https://github.com/pganalyze/libpg_query) -- the core PostgreSQL parser that powers this project
Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
// Jest Snapshot v1, https://jestjs.io/docs/snapshot-testing
2+
3+
exports[`fixture round-trip (CST) alter-and-drop deparsed output matches snapshot 1`] = `
4+
"-- Add columns to existing table
5+
ALTER TABLE app.users
6+
ADD COLUMN bio text;
7+
ALTER TABLE app.users
8+
ADD COLUMN avatar_url text;
9+
10+
-- Rename a column
11+
ALTER TABLE app.users RENAME COLUMN username TO display_name;
12+
13+
-- Drop unused objects
14+
DROP INDEX IF EXISTS app.idx_old_index;
15+
16+
-- Recreate with new definition
17+
CREATE INDEX idx_users_display_name ON app.users (display_name);"
18+
`;
19+
20+
exports[`fixture round-trip (CST) edge-cases deparsed output matches snapshot 1`] = `
21+
"-- Comments with special characters: don't break "parsing"
22+
SELECT 1;
23+
24+
-- Inline comment after statement
25+
SELECT 2; -- trailing note
26+
27+
-- Adjacent comments with no blank line
28+
-- first line
29+
-- second line
30+
SELECT 3;
31+
32+
-- Dollar-quoted body with internal comments (should NOT be extracted)
33+
CREATE FUNCTION app.noop() RETURNS void AS $$
34+
BEGIN
35+
-- this comment is inside the function body
36+
NULL;
37+
END;
38+
$$ LANGUAGE plpgsql;
39+
40+
-- String that looks like a comment
41+
SELECT '-- not a comment' AS val;
42+
43+
-- Empty statement list edge
44+
SELECT 4;"
45+
`;
46+
47+
exports[`fixture round-trip (CST) grants-and-policies deparsed output matches snapshot 1`] = `
48+
"-- RLS policies for the users table
49+
ALTER TABLE app.users
50+
ENABLE ROW LEVEL SECURITY;
51+
52+
-- Admins can see all rows
53+
CREATE POLICY admin_all
54+
ON app.users
55+
AS PERMISSIVE
56+
FOR ALL
57+
TO admin_role
58+
USING (
59+
true
60+
);
61+
62+
-- Users can only see their own row
63+
CREATE POLICY own_row
64+
ON app.users
65+
AS PERMISSIVE
66+
FOR SELECT
67+
TO authenticated
68+
USING (
69+
id = (current_setting('app.current_user_id'))::int
70+
);
71+
72+
-- Grant basic access
73+
GRANT USAGE ON SCHEMA app TO authenticated;
74+
GRANT SELECT ON app.users TO authenticated;
75+
GRANT ALL ON app.users TO admin_role;"
76+
`;
77+
78+
exports[`fixture round-trip (CST) mid-statement-comments deparsed output matches snapshot 1`] = `
79+
"-- Mid-statement comments are hoisted above their enclosing statement.
80+
-- The deparser cannot inject comments back into the middle of a
81+
-- statement, so they are preserved as standalone lines above it.
82+
83+
-- Simple mid-statement comment
84+
-- the primary key
85+
SELECT
86+
id,
87+
name
88+
FROM users;
89+
90+
-- Multiple mid-statement comments in one query
91+
-- user ID
92+
-- display name
93+
-- role from join
94+
SELECT
95+
u.id,
96+
u.name,
97+
r.role_name
98+
FROM users AS u
99+
JOIN roles AS r ON r.id = u.role_id;
100+
101+
-- Mid-statement comment in INSERT values
102+
-- log level
103+
-- log body
104+
INSERT INTO logs (
105+
level,
106+
message
107+
) VALUES
108+
('info', 'hello');
109+
110+
-- Comment between clauses
111+
-- filter active only
112+
SELECT id
113+
FROM users
114+
WHERE
115+
active = true;"
116+
`;
117+
118+
exports[`fixture round-trip (CST) multi-statement deparsed output matches snapshot 1`] = `
119+
"-- Schema setup
120+
CREATE SCHEMA IF NOT EXISTS app;
121+
122+
-- Users table
123+
CREATE TABLE app.users (
124+
id serial PRIMARY KEY,
125+
username text NOT NULL,
126+
created_at timestamptz DEFAULT now()
127+
);
128+
129+
-- Roles table
130+
CREATE TABLE app.roles (
131+
id serial PRIMARY KEY,
132+
name text UNIQUE NOT NULL
133+
);
134+
135+
-- Junction table
136+
CREATE TABLE app.user_roles (
137+
user_id int REFERENCES app.users (id),
138+
role_id int REFERENCES app.roles (id),
139+
PRIMARY KEY (user_id, role_id)
140+
);
141+
142+
-- Seed default roles
143+
INSERT INTO app.roles (
144+
name
145+
) VALUES
146+
('admin'),
147+
('viewer');"
148+
`;
149+
150+
exports[`fixture round-trip (CST) pgpm-header deparsed output matches snapshot 1`] = `
151+
"-- Deploy schemas/my-app/tables/users to pg
152+
-- requires: schemas/my-app/schema
153+
154+
BEGIN;
155+
156+
-- Create the main users table
157+
CREATE TABLE my_app.users (
158+
id serial PRIMARY KEY,
159+
name text NOT NULL,
160+
email text UNIQUE
161+
);
162+
163+
-- Add an index for fast lookups
164+
CREATE INDEX idx_users_email ON my_app.users (email);
165+
166+
COMMIT;"
167+
`;
168+
169+
exports[`fixture round-trip (CST) plpgsql-function deparsed output matches snapshot 1`] = `
170+
"-- Deploy schemas/app/functions/get_user to pg
171+
-- requires: schemas/app/tables/users
172+
173+
BEGIN;
174+
175+
-- Function to get a user by ID
176+
CREATE FUNCTION app.get_user(
177+
p_id int
178+
) RETURNS TABLE (
179+
id int,
180+
username text,
181+
created_at timestamptz
182+
) AS $$
183+
BEGIN
184+
-- Return the matching user
185+
RETURN QUERY
186+
SELECT u.id, u.username, u.created_at
187+
FROM app.users u
188+
WHERE u.id = p_id;
189+
END;
190+
$$ LANGUAGE plpgsql STABLE;
191+
192+
-- Grant execute to authenticated users
193+
GRANT EXECUTE ON FUNCTION app.get_user(int) TO authenticated;
194+
195+
COMMIT;"
196+
`;
197+
198+
exports[`fixture round-trip (CST) views-and-triggers deparsed output matches snapshot 1`] = `
199+
"-- Active users view
200+
CREATE VIEW app.active_users AS SELECT
201+
id,
202+
username,
203+
created_at
204+
FROM app.users
205+
WHERE
206+
created_at > (now() - '90 days'::interval);
207+
208+
-- Audit trigger function
209+
CREATE FUNCTION app.audit_trigger() RETURNS trigger AS $$
210+
BEGIN
211+
INSERT INTO app.audit_log (table_name, action, row_id)
212+
VALUES (TG_TABLE_NAME, TG_OP, NEW.id);
213+
RETURN NEW;
214+
END;
215+
$$ LANGUAGE plpgsql;
216+
217+
-- Attach trigger to users table
218+
CREATE TRIGGER users_audit
219+
AFTER INSERT OR UPDATE
220+
ON app.users
221+
FOR EACH ROW
222+
EXECUTE PROCEDURE app.audit_trigger();"
223+
`;
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
-- Add columns to existing table
2+
ALTER TABLE app.users ADD COLUMN bio text;
3+
ALTER TABLE app.users ADD COLUMN avatar_url text;
4+
5+
-- Rename a column
6+
ALTER TABLE app.users RENAME COLUMN username TO display_name;
7+
8+
-- Drop unused objects
9+
DROP INDEX IF EXISTS app.idx_old_index;
10+
11+
-- Recreate with new definition
12+
CREATE INDEX idx_users_display_name ON app.users (display_name);
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
-- Comments with special characters: don't break "parsing"
2+
SELECT 1;
3+
4+
-- Inline comment after statement
5+
SELECT 2; -- trailing note
6+
7+
-- Adjacent comments with no blank line
8+
-- first line
9+
-- second line
10+
SELECT 3;
11+
12+
-- Dollar-quoted body with internal comments (should NOT be extracted)
13+
CREATE FUNCTION app.noop() RETURNS void AS $$
14+
BEGIN
15+
-- this comment is inside the function body
16+
NULL;
17+
END;
18+
$$ LANGUAGE plpgsql;
19+
20+
-- String that looks like a comment
21+
SELECT '-- not a comment' AS val;
22+
23+
-- Empty statement list edge
24+
SELECT 4;
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
-- RLS policies for the users table
2+
ALTER TABLE app.users ENABLE ROW LEVEL SECURITY;
3+
4+
-- Admins can see all rows
5+
CREATE POLICY admin_all ON app.users
6+
FOR ALL
7+
TO admin_role
8+
USING (true);
9+
10+
-- Users can only see their own row
11+
CREATE POLICY own_row ON app.users
12+
FOR SELECT
13+
TO authenticated
14+
USING (id = current_setting('app.current_user_id')::integer);
15+
16+
-- Grant basic access
17+
GRANT USAGE ON SCHEMA app TO authenticated;
18+
GRANT SELECT ON app.users TO authenticated;
19+
GRANT ALL ON app.users TO admin_role;
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
-- Mid-statement comments are hoisted above their enclosing statement.
2+
-- The deparser cannot inject comments back into the middle of a
3+
-- statement, so they are preserved as standalone lines above it.
4+
5+
-- Simple mid-statement comment
6+
SELECT
7+
id, -- the primary key
8+
name
9+
FROM users;
10+
11+
-- Multiple mid-statement comments in one query
12+
SELECT
13+
u.id, -- user ID
14+
u.name, -- display name
15+
r.role_name -- role from join
16+
FROM users u
17+
JOIN roles r ON r.id = u.role_id;
18+
19+
-- Mid-statement comment in INSERT values
20+
INSERT INTO logs (level, message)
21+
VALUES (
22+
'info', -- log level
23+
'hello' -- log body
24+
);
25+
26+
-- Comment between clauses
27+
SELECT id
28+
FROM users
29+
-- filter active only
30+
WHERE active = true;

0 commit comments

Comments
 (0)