@@ -11,6 +11,7 @@ import { PostConnectSuggestions } from "./post-connect-suggestions"
1111// altimate_change end
1212// altimate_change start — pre-execution SQL validation via cached schema
1313import { getCache } from "../native/schema/cache"
14+ import * as Registry from "../native/connections/registry"
1415// altimate_change end
1516
1617export const SqlExecuteTool = Tool . define ( "sql_execute" , {
@@ -104,6 +105,10 @@ export const SqlExecuteTool = Tool.define("sql_execute", {
104105
105106// altimate_change start — pre-execution SQL validation via cached schema
106107const CACHE_TTL_MS = 24 * 60 * 60 * 1000 // 24 hours
108+ // High ceiling so large warehouses aren't arbitrarily truncated; we emit
109+ // schema_truncated in telemetry when the cap is reached so the shadow sample
110+ // can be interpreted correctly.
111+ const COLUMN_SCAN_LIMIT = 500_000
107112
108113interface PreValidationResult {
109114 blocked : boolean
@@ -113,33 +118,43 @@ interface PreValidationResult {
113118async function preValidateSql ( sql : string , warehouse ?: string ) : Promise < PreValidationResult > {
114119 const startTime = Date . now ( )
115120 try {
116- const cache = await getCache ( )
117- const status = cache . cacheStatus ( )
118-
119- // Find the target warehouse in cache
120- const warehouseName = warehouse || status . warehouses [ 0 ] ?. name
121+ // Resolve the warehouse the same way sql.execute's fallback path does:
122+ // when caller omits `warehouse`, sql.execute uses Registry.list()[0].
123+ // Matching that here keeps the shadow validation aligned with actual
124+ // execution (dbt-routed queries are a known gap — they short-circuit
125+ // before this fallback, so validation may use a different warehouse
126+ // than the one dbt selects).
127+ let warehouseName = warehouse
128+ if ( ! warehouseName ) {
129+ const registered = Registry . list ( ) . warehouses
130+ warehouseName = registered [ 0 ] ?. name
131+ }
121132 if ( ! warehouseName ) {
122- trackPreValidation ( "skipped" , "no_cache" , 0 , Date . now ( ) - startTime )
133+ trackPreValidation ( "skipped" , "no_cache" , 0 , Date . now ( ) - startTime , false )
123134 return { blocked : false }
124135 }
125136
137+ const cache = await getCache ( )
138+ const status = cache . cacheStatus ( )
139+
126140 const warehouseStatus = status . warehouses . find ( ( w ) => w . name === warehouseName )
127141 if ( ! warehouseStatus ?. last_indexed ) {
128- trackPreValidation ( "skipped" , "no_cache" , 0 , Date . now ( ) - startTime )
142+ trackPreValidation ( "skipped" , "no_cache" , 0 , Date . now ( ) - startTime , false )
129143 return { blocked : false }
130144 }
131145
132146 // Check cache freshness
133147 const cacheAge = Date . now ( ) - new Date ( warehouseStatus . last_indexed ) . getTime ( )
134148 if ( cacheAge > CACHE_TTL_MS ) {
135- trackPreValidation ( "skipped" , "stale_cache" , 0 , Date . now ( ) - startTime )
149+ trackPreValidation ( "skipped" , "stale_cache" , 0 , Date . now ( ) - startTime , false )
136150 return { blocked : false }
137151 }
138152
139153 // Build schema context from cached columns
140- const columns = cache . listColumns ( warehouseName , 10_000 )
154+ const columns = cache . listColumns ( warehouseName , COLUMN_SCAN_LIMIT )
155+ const schemaTruncated = columns . length >= COLUMN_SCAN_LIMIT
141156 if ( columns . length === 0 ) {
142- trackPreValidation ( "skipped" , "empty_cache" , 0 , Date . now ( ) - startTime )
157+ trackPreValidation ( "skipped" , "empty_cache" , 0 , Date . now ( ) - startTime , false )
143158 return { blocked : false }
144159 }
145160
@@ -168,7 +183,7 @@ async function preValidateSql(sql: string, warehouse?: string): Promise<PreValid
168183 const isValid = data . valid !== false && errors . length === 0
169184
170185 if ( isValid ) {
171- trackPreValidation ( "passed" , "valid" , columns . length , Date . now ( ) - startTime )
186+ trackPreValidation ( "passed" , "valid" , columns . length , Date . now ( ) - startTime , schemaTruncated )
172187 return { blocked : false }
173188 }
174189
@@ -180,7 +195,7 @@ async function preValidateSql(sql: string, warehouse?: string): Promise<PreValid
180195
181196 if ( structuralErrors . length === 0 ) {
182197 // Non-structural errors (ambiguous cases) — let them through
183- trackPreValidation ( "passed" , "non_structural" , columns . length , Date . now ( ) - startTime )
198+ trackPreValidation ( "passed" , "non_structural" , columns . length , Date . now ( ) - startTime , schemaTruncated )
184199 return { blocked : false }
185200 }
186201
@@ -202,11 +217,11 @@ async function preValidateSql(sql: string, warehouse?: string): Promise<PreValid
202217 `Fix the query and retry. If the schema cache is outdated, run schema_index to refresh it.` ,
203218 ] . join ( "\n" )
204219
205- trackPreValidation ( "blocked" , "structural_error" , columns . length , Date . now ( ) - startTime , errorMsgs )
220+ trackPreValidation ( "blocked" , "structural_error" , columns . length , Date . now ( ) - startTime , schemaTruncated , errorMsgs )
206221 return { blocked : true , error : errorOutput }
207222 } catch {
208223 // Validation failure should never block execution
209- trackPreValidation ( "error" , "validation_exception" , 0 , Date . now ( ) - startTime )
224+ trackPreValidation ( "error" , "validation_exception" , 0 , Date . now ( ) - startTime , false )
210225 return { blocked : false }
211226 }
212227}
@@ -216,17 +231,23 @@ function trackPreValidation(
216231 reason : string ,
217232 schema_columns : number ,
218233 duration_ms : number ,
234+ schema_truncated : boolean ,
219235 error_message ?: string ,
220236) {
237+ // Mask schema identifiers (table / column names, paths, user IDs) from the
238+ // validator error BEFORE it leaves the process — these are PII-adjacent and
239+ // must not land in App Insights as raw strings.
240+ const masked = error_message ? Telemetry . maskString ( error_message ) . slice ( 0 , 500 ) : undefined
221241 Telemetry . track ( {
222242 type : "sql_pre_validation" ,
223243 timestamp : Date . now ( ) ,
224244 session_id : Telemetry . getContext ( ) . sessionId ,
225245 outcome,
226246 reason,
227247 schema_columns,
248+ schema_truncated,
228249 duration_ms,
229- ...( error_message && { error_message : error_message . slice ( 0 , 500 ) } ) ,
250+ ...( masked && { error_message : masked } ) ,
230251 } )
231252}
232253// altimate_change end
0 commit comments