@@ -29,6 +29,23 @@ export const DataDiffRunTool = Tool.define("data_diff", {
2929 "profile=column statistics only. cascade=count→profile→content." ,
3030 ) ,
3131 where_clause : z . string ( ) . optional ( ) . describe ( "Optional WHERE filter applied to both tables" ) ,
32+ source_where_clause : z
33+ . string ( )
34+ . optional ( )
35+ . describe ( "WHERE filter applied only to the source table (e.g., date range filter)" ) ,
36+ target_where_clause : z
37+ . string ( )
38+ . optional ( )
39+ . describe ( "WHERE filter applied only to the target table" ) ,
40+ numeric_tolerance : z
41+ . number ( )
42+ . optional ( )
43+ . describe ( "Absolute tolerance for numeric comparisons (e.g., 0.01). Values within this threshold are treated as equal." ) ,
44+ timestamp_tolerance_ms : z
45+ . number ( )
46+ . int ( )
47+ . optional ( )
48+ . describe ( "Tolerance for timestamp comparisons in milliseconds (e.g., 1000 for 1 second)" ) ,
3249 source_database : z . string ( ) . optional ( ) . describe ( "Source database/catalog name" ) ,
3350 source_schema : z . string ( ) . optional ( ) . describe ( "Source schema name" ) ,
3451 target_database : z . string ( ) . optional ( ) . describe ( "Target database/catalog name" ) ,
@@ -45,6 +62,10 @@ export const DataDiffRunTool = Tool.define("data_diff", {
4562 extra_columns : args . extra_columns ,
4663 algorithm : args . algorithm ,
4764 where_clause : args . where_clause ,
65+ source_where_clause : args . source_where_clause ,
66+ target_where_clause : args . target_where_clause ,
67+ numeric_tolerance : args . numeric_tolerance ,
68+ timestamp_tolerance_ms : args . timestamp_tolerance_ms ,
4869 source_database : args . source_database ,
4970 source_schema : args . source_schema ,
5071 target_database : args . target_database ,
@@ -103,6 +124,39 @@ function formatOutcome(outcome: Record<string, unknown>, args: Record<string, un
103124 lines . push ( `Exclusive to table2: ${ stats . exclusive_table2 ?? 0 } ` )
104125 lines . push ( `Updated: ${ stats . updated ?? 0 } ` )
105126 lines . push ( `Diff %: ${ ( ( stats . diff_percent as number ) * 100 ) . toFixed ( 2 ) } %` )
127+
128+ // Per-column match rates
129+ const matchRates = ( stats . column_match_rates ?? [ ] ) as Record < string , unknown > [ ]
130+ if ( matchRates . length > 0 ) {
131+ lines . push ( "" )
132+ lines . push ( "Column Match Rates:" )
133+ for ( const col of matchRates ) {
134+ const pct = ( col . match_percent as number ) . toFixed ( 1 )
135+ lines . push ( ` ${ col . column } : ${ pct } % (${ col . matched } /${ col . total } )` )
136+ }
137+ }
138+
139+ // Mismatch samples
140+ const samples = ( stats . mismatch_samples ?? [ ] ) as Record < string , unknown > [ ]
141+ if ( samples . length > 0 ) {
142+ lines . push ( "" )
143+ lines . push ( "Sample Mismatches:" )
144+ for ( const s of samples ) {
145+ const key = ( s . key_values as string [ ] | undefined ) ?. join ( ", " ) ?? "?"
146+ const cat = s . category as string
147+ if ( cat === "exclusive_table1" ) {
148+ lines . push ( ` [${ key } ] only in source` )
149+ } else if ( cat === "exclusive_table2" ) {
150+ lines . push ( ` [${ key } ] only in target` )
151+ } else if ( cat === "null_in_source" ) {
152+ lines . push ( ` [${ key } ] NULL in source, "${ s . value_table2 } " in target` )
153+ } else if ( cat === "null_in_target" ) {
154+ lines . push ( ` [${ key } ] "${ s . value_table1 } " in source, NULL in target` )
155+ } else {
156+ lines . push ( ` [${ key } ] "${ s . value_table1 } " vs "${ s . value_table2 } "` )
157+ }
158+ }
159+ }
106160 } else {
107161 lines . push ( `Unchanged: ${ stats . unchanged ?? stats . rows_table1 } ` )
108162 }
0 commit comments