@@ -11,7 +11,9 @@ export function classifyClickHouseSqlVsPrompt(input: unknown): {
1111 const raw = ( input ?? "" ) . toString ( ) ;
1212 const s = raw . trim ( ) ;
1313
14- if ( ! s ) return { kind : "prompt" , confidence : 0.5 , reasons : [ "empty" ] } ;
14+ if ( ! s ) {
15+ return { kind : "prompt" , confidence : 0.5 , reasons : [ "empty" ] } ;
16+ }
1517
1618 // Strip code fences if someone pasted markdown
1719 const unfenced = s . replace ( / ^ ` ` ` [ \w - ] * \n ( [ \s \S ] * ?) \n ` ` ` $ / m, "$1" ) . trim ( ) ;
@@ -25,15 +27,28 @@ export function classifyClickHouseSqlVsPrompt(input: unknown): {
2527 const reasons : string [ ] = [ ] ;
2628
2729 // 1) Strong starts (high signal)
28- const startsWithSql = / ^ ( w i t h | s e l e c t | i n s e r t | u p d a t e | d e l e t e | a l t e r | c r e a t e | d r o p | t r u n c a t e | s h o w | d e s c r i b e | d e s c | e x p l a i n | u s e | s e t ) \b / i. test ( unfenced ) ;
29- if ( startsWithSql ) { sql += 3 ; reasons . push ( "starts-with-sql-keyword" ) ; }
30+ const startsWithSqlKeyword = / ^ ( w i t h | s e l e c t | i n s e r t | u p d a t e | d e l e t e | a l t e r | c r e a t e | d r o p | t r u n c a t e | d e s c r i b e | d e s c | e x p l a i n | u s e | s e t ) \b / i. test ( unfenced ) ;
31+ const showEnglishLead = / ^ s h o w \s + ( m e | u s | m y | o u r | y o u r | t h e m | h i m | h e r ) \b / i. test ( unfenced ) ;
32+ const showHasSqlTarget = / ^ s h o w \s + ( t a b l e s ? | d a t a b a s e s ? | c o l u m n s ? | c r e a t e | p r o c e s s l i s t | f u n c t i o n s ? | s e t t i n g s | g r a n t s | r o l e s | q u o t a s | d i c t i o n a r y | d i c t i o n a r i e s | c l u s t e r s | i n d e x e s | p a r t i t i o n s | p r i v i l e g e s | u s e r s ? ) \b / i. test ( unfenced ) ;
33+ const startsWithShowSql = / ^ s h o w \b / i. test ( unfenced ) && showHasSqlTarget && ! showEnglishLead ;
34+ const startsWithSql = startsWithSqlKeyword || startsWithShowSql ;
35+ if ( startsWithSql ) {
36+ sql += 3 ;
37+ reasons . push ( "starts-with-sql-keyword" ) ;
38+ }
3039
3140 // 2) Structural patterns (very strong)
3241 const hasSelectFrom = / \b s e l e c t \b [ \s \S ] { 0 , 300 } \b f r o m \b / i. test ( unfenced ) ;
33- if ( hasSelectFrom ) { sql += 4 ; reasons . push ( "select-from-structure" ) ; }
42+ if ( hasSelectFrom ) {
43+ sql += 4 ;
44+ reasons . push ( "select-from-structure" ) ;
45+ }
3446
3547 const hasInsertInto = / \b i n s e r t \b [ \s \S ] { 0 , 80 } \b i n t o \b / i. test ( unfenced ) ;
36- if ( hasInsertInto ) { sql += 4 ; reasons . push ( "insert-into-structure" ) ; }
48+ if ( hasInsertInto ) {
49+ sql += 4 ;
50+ reasons . push ( "insert-into-structure" ) ;
51+ }
3752
3853 // 3) Common SQL clauses
3954 const clauseHits = [
@@ -49,44 +64,64 @@ export function classifyClickHouseSqlVsPrompt(input: unknown): {
4964 "prewhere" , "final" , "sample" , "array" , "engine" , "partition" , "ttl" , "distributed" , "merge" , "replacing" , "collapsing" ,
5065 "materialized" , "view" , "database" , "table" , "cluster"
5166 ] . filter ( k => wordSet . has ( k ) ) ;
52- if ( chHits . length ) { sql += 2 ; reasons . push ( "clickhouse-ish:" + chHits . join ( "," ) ) ; }
67+ if ( chHits . length ) {
68+ sql += 2 ;
69+ reasons . push ( "clickhouse-ish:" + chHits . join ( "," ) ) ;
70+ }
5371
5472 // 5) Operator / punctuation density
5573 const opCount = ( unfenced . match ( / ( < = | > = | ! = | = | < | > | \b ( i n | l i k e | i l i k e | b e t w e e n | a n d | o r ) \b ) / gi) ?? [ ] ) . length ;
56- if ( opCount >= 2 ) { sql += 2 ; reasons . push ( "many-operators" ) ; }
57- else if ( opCount === 1 ) { sql += 1 ; reasons . push ( "some-operators" ) ; }
74+ if ( opCount >= 2 ) {
75+ sql += 2 ;
76+ reasons . push ( "many-operators" ) ;
77+ } else if ( opCount === 1 ) {
78+ sql += 1 ;
79+ reasons . push ( "some-operators" ) ;
80+ }
5881
5982 const punct = ( unfenced . match ( / [ ( ) , ; * ] / g) ?? [ ] ) . length ;
6083 const punctRatio = punct / Math . max ( 1 , unfenced . length ) ;
61- if ( punctRatio > 0.03 ) { sql += 1 ; reasons . push ( "sql-punctuation-density" ) ; }
84+ if ( punctRatio > 0.03 ) {
85+ sql += 1 ;
86+ reasons . push ( "sql-punctuation-density" ) ;
87+ }
6288
6389 // 6) Identifier-ish things
6490 if ( / ` [ ^ ` ] + ` / . test ( unfenced ) || / " [ ^ " ] + " \. " [ ^ " ] + " / . test ( unfenced ) ) {
65- sql += 1 ; reasons . push ( "quoted-identifiers" ) ;
91+ sql += 1 ;
92+ reasons . push ( "quoted-identifiers" ) ;
6693 }
6794 if ( / \b [ a - z _ ] + \. [ a - z _ ] + \b / i. test ( unfenced ) ) {
68- sql += 1 ; reasons . push ( "dot-identifiers" ) ;
95+ sql += 1 ;
96+ reasons . push ( "dot-identifiers" ) ;
6997 }
7098 if ( / - - | \/ \* / . test ( unfenced ) ) {
71- sql += 1 ; reasons . push ( "sql-comments" ) ;
99+ sql += 1 ;
100+ reasons . push ( "sql-comments" ) ;
72101 }
73102
74103 // Prompt-ish features
75- if ( / [ ? ] \s * $ / . test ( unfenced ) ) { prompt += 2 ; reasons . push ( "ends-with-question-mark" ) ; }
104+ if ( / [ ? ] \s * $ / . test ( unfenced ) ) {
105+ prompt += 2 ;
106+ reasons . push ( "ends-with-question-mark" ) ;
107+ }
76108 if ( / \b ( p l e a s e | c o u l d y o u | c a n y o u | w h a t | w h y | h o w | e x p l a i n | h e l p ) \b / i. test ( unfenced ) ) {
77- prompt += 2 ; reasons . push ( "prompt-words" ) ;
109+ prompt += 2 ;
110+ reasons . push ( "prompt-words" ) ;
78111 }
79112
80113 // If it's mostly letters/spaces and barely any operators, lean prompt.
81114 const symbolChars = ( unfenced . match ( / [ ^ a - z 0 - 9 _ \s ] / gi) ?? [ ] ) . length ;
82115 const symbolRatio = symbolChars / Math . max ( 1 , unfenced . length ) ;
83116 if ( symbolRatio < 0.06 && opCount === 0 && ! startsWithSql ) {
84- prompt += 2 ; reasons . push ( "low-symbol-low-operator" ) ;
117+ prompt += 2 ;
118+ reasons . push ( "low-symbol-low-operator" ) ;
85119 }
86120
87121 // Avoid the classic false positive: "select ..." in English without any SQL structure
88122 if ( / ^ s e l e c t \b / i. test ( unfenced ) && ! hasSelectFrom && clauseHits . length === 0 && opCount === 0 ) {
89- prompt += 3 ; reasons . push ( "english-select-false-positive-guard" ) ;
123+ prompt += 3 ;
124+ reasons . push ( "english-select-false-positive-guard" ) ;
90125 }
91126
92127 const margin = sql - prompt ;
0 commit comments