Skip to content

Commit a0a1f0e

Browse files
committed
fix: escape control characters in build_scan_json() for multi-line tokens
The JSON escape loop in build_scan_json() only handled '"' and '\', but not '\n', '\r', '\t'. When token text contains literal newlines (e.g., dollar-quoted function bodies like $$\nBEGIN\n...$$), the raw JSON string had unescaped control characters, causing JSON.parse to throw 'Bad control character in string literal'. This adds proper escape sequences for \n, \r, and \t in the token text escaping loop, matching standard JSON string escaping rules. New tests added for multi-line dollar-quoted strings, tabs, and multi-line C-style comments.
1 parent 8ad9a92 commit a0a1f0e

2 files changed

Lines changed: 56 additions & 2 deletions

File tree

full/src/wasm_wrapper.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,8 +379,19 @@ static char* build_scan_json(PgQuery__ScanResult *scan_result, const char* origi
379379
char c = token_text[j];
380380
if (c == '"' || c == '\\') {
381381
escaped_text[escaped_pos++] = '\\';
382+
escaped_text[escaped_pos++] = c;
383+
} else if (c == '\n') {
384+
escaped_text[escaped_pos++] = '\\';
385+
escaped_text[escaped_pos++] = 'n';
386+
} else if (c == '\r') {
387+
escaped_text[escaped_pos++] = '\\';
388+
escaped_text[escaped_pos++] = 'r';
389+
} else if (c == '\t') {
390+
escaped_text[escaped_pos++] = '\\';
391+
escaped_text[escaped_pos++] = 't';
392+
} else {
393+
escaped_text[escaped_pos++] = c;
382394
}
383-
escaped_text[escaped_pos++] = c;
384395
}
385396
escaped_text[escaped_pos] = '\0';
386397

full/test/scan.test.js

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,5 +225,48 @@ describe("Query Scanning", () => {
225225
assert.equal(typeof result1.version, "number");
226226
assert.ok(result1.version > 0);
227227
});
228+
229+
it("should handle multi-line dollar-quoted strings without JSON errors", () => {
230+
// This tests that the JSON serialization properly escapes control
231+
// characters (newlines, tabs) inside token text fields.
232+
const sql = `CREATE FUNCTION test() RETURNS void AS $$
233+
BEGIN
234+
RAISE NOTICE 'hello';
235+
END;
236+
$$ LANGUAGE plpgsql`;
237+
238+
const result = query.scanSync(sql);
239+
assert.equal(typeof result, "object");
240+
assert.ok(Array.isArray(result.tokens));
241+
assert.ok(result.tokens.length > 0);
242+
243+
// Find the dollar-quoted string token
244+
const dollarToken = result.tokens.find(t => t.text.includes('BEGIN'));
245+
assert.ok(dollarToken, "should have a token containing the function body");
246+
assert.ok(dollarToken.text.includes('\n'), "token text should contain newlines");
247+
});
248+
249+
it("should handle multi-line tokens with tabs", () => {
250+
const sql = "SELECT $$line1\n\tindented\nline3$$";
251+
252+
const result = query.scanSync(sql);
253+
assert.equal(typeof result, "object");
254+
assert.ok(Array.isArray(result.tokens));
255+
256+
const dollarToken = result.tokens.find(t => t.text.includes('indented'));
257+
assert.ok(dollarToken, "should have a token containing the tabbed content");
258+
});
259+
260+
it("should handle multi-line SQL comments", () => {
261+
const sql = "SELECT 1; /* multi\nline\ncomment */ SELECT 2";
262+
263+
const result = query.scanSync(sql);
264+
assert.equal(typeof result, "object");
265+
assert.ok(Array.isArray(result.tokens));
266+
267+
const commentToken = result.tokens.find(t => t.tokenName === "C_COMMENT");
268+
assert.ok(commentToken, "should have a C_COMMENT token");
269+
assert.ok(commentToken.text.includes('\n'), "comment text should contain newlines");
270+
});
228271
});
229-
});
272+
});

0 commit comments

Comments
 (0)