-
Notifications
You must be signed in to change notification settings - Fork 53
Expand file tree
/
Copy pathsql-classify.ts
More file actions
135 lines (123 loc) · 5.16 KB
/
sql-classify.ts
File metadata and controls
135 lines (123 loc) · 5.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
// altimate_change - SQL query classifier for write detection
//
// Uses altimate-core's AST-based getStatementTypes() for accurate classification.
// Handles CTEs, string literals, procedural blocks, all dialects correctly.
// Falls back to regex-based heuristics if the napi binary fails to load.
// Safe import: napi binary may not be available on all platforms
let getStatementTypes: ((sql: string, dialect?: string | null) => any) | null = null
let extractMetadata: ((sql: string) => any) | null = null
try {
// eslint-disable-next-line @typescript-eslint/no-require-imports
const core = require("@altimateai/altimate-core")
if (typeof core?.getStatementTypes === "function") {
getStatementTypes = core.getStatementTypes
}
if (typeof core?.extractMetadata === "function") {
extractMetadata = core.extractMetadata
}
} catch {
// napi binary failed to load — will use regex fallback
}
// Only SELECT queries are known safe. "other" (SHOW, SET, USE, etc.) is ambiguous — prompt for permission.
const READ_CATEGORIES = new Set(["query"])
// Hard-deny patterns — blocked regardless of permissions
const HARD_DENY_TYPES = new Set(["DROP DATABASE", "DROP SCHEMA", "TRUNCATE", "TRUNCATE TABLE"])
// Regex fallback: conservative — only known-safe reads are whitelisted, everything else is "write"
const READ_PATTERN = /^\s*(SELECT|WITH|SHOW|EXPLAIN|DESCRIBE|DESC)\b/i
const HARD_DENY_PATTERN =
/^\s*(DROP\s+(DATABASE|SCHEMA)\b|TRUNCATE(\s+TABLE)?\b)/i
/**
* Regex-based fallback classifier for when altimate-core is unavailable.
* Conservative: treats anything not clearly a SELECT/WITH/SHOW/EXPLAIN as "write".
* Handles multi-statement SQL by splitting on semicolons and checking each statement.
*/
function classifyFallback(sql: string): { queryType: "read" | "write"; blocked: boolean } {
const cleaned = sql
.replace(/\/\*[\s\S]*?\*\//g, "") // block comments
.replace(/--[^\n]*/g, "") // line comments
const statements = cleaned.split(";").map(s => s.trim()).filter(Boolean)
if (statements.length === 0) return { queryType: "read", blocked: false }
let queryType: "read" | "write" = "read"
let blocked = false
for (const stmt of statements) {
if (HARD_DENY_PATTERN.test(stmt)) blocked = true
if (!READ_PATTERN.test(stmt)) queryType = "write"
}
return { queryType, blocked }
}
/**
* Classify a SQL string as "read" or "write" using AST parsing.
* If ANY statement is a write, returns "write".
*/
export function classify(sql: string): "read" | "write" {
if (!sql || typeof sql !== "string") return "read"
if (!getStatementTypes) return classifyFallback(sql).queryType
try {
const result = getStatementTypes(sql)
if (!result?.categories?.length) return "read"
return result.categories.some((c: string) => !READ_CATEGORIES.has(c)) ? "write" : "read"
} catch {
return classifyFallback(sql).queryType
}
}
/**
* Classify a multi-statement SQL string.
* getStatementTypes handles multi-statement natively — no semicolon splitting needed.
*/
export function classifyMulti(sql: string): "read" | "write" {
return classify(sql)
}
/**
* Single-pass: classify and check for hard-denied statement types.
* Returns both the overall query type and whether a hard-deny pattern was found.
*/
export function classifyAndCheck(sql: string): { queryType: "read" | "write"; blocked: boolean } {
if (!sql || typeof sql !== "string") return { queryType: "read", blocked: false }
if (!getStatementTypes) return classifyFallback(sql)
try {
const result = getStatementTypes(sql)
if (!result?.statements?.length) return { queryType: "read", blocked: false }
const blocked = result.statements.some(
(s: { statement_type: string }) =>
s.statement_type && HARD_DENY_TYPES.has(s.statement_type.toUpperCase()),
)
const categories = result.categories ?? []
const queryType = categories.some((c: string) => !READ_CATEGORIES.has(c)) ? "write" : "read"
return { queryType: queryType as "read" | "write", blocked }
} catch {
return classifyFallback(sql)
}
}
// altimate_change start — SQL structure fingerprint for telemetry (no content, only shape)
export interface SqlFingerprint {
statement_types: string[]
categories: string[]
table_count: number
function_count: number
has_subqueries: boolean
has_aggregation: boolean
has_window_functions: boolean
node_count: number
}
/** Compute a PII-safe structural fingerprint of a SQL query.
* Uses altimate-core AST parsing — local, no API calls, ~1-5ms. */
export function computeSqlFingerprint(sql: string): SqlFingerprint | null {
if (!getStatementTypes || !extractMetadata) return null
try {
const stmtResult = getStatementTypes(sql)
const meta = extractMetadata(sql)
return {
statement_types: stmtResult?.types ?? [],
categories: stmtResult?.categories ?? [],
table_count: meta?.tables?.length ?? 0,
function_count: meta?.functions?.length ?? 0,
has_subqueries: meta?.has_subqueries ?? false,
has_aggregation: meta?.has_aggregation ?? false,
has_window_functions: meta?.has_window_functions ?? false,
node_count: meta?.node_count ?? 0,
}
} catch {
return null
}
}
// altimate_change end