@@ -900,22 +900,70 @@ fn generate_nodes(nodes: &[AstNodeSrc], enums: &[AstEnumSrc]) -> String {
900900 format ! ( "{PRELUDE}{output}" )
901901}
902902
903- fn update_textmate_keywords ( all_keywords : & [ String ] ) -> Result < ( ) > {
904- let tmlanguage_path = project_root ( ) . join ( "squawk-vscode/syntaxes/pgsql.tmLanguage.json" ) ;
905- let content = std:: fs:: read_to_string ( & tmlanguage_path) ?;
906- let mut json: serde_json:: Value = serde_json:: from_str ( & content) ?;
903+ // Multi-word keyword phrases that should be highlighted as keywords, not
904+ // operators.
905+ const KEYWORD_PHRASES : & [ & str ] = & [ "if not exists" , "if exists" ] ;
906+
907+ // Multi-word entries must come before their single-word components so the
908+ // regex engine matches the longest form first.
909+ const KEYWORD_OPERATORS : & [ & str ] = & [
910+ "not between symmetric" ,
911+ "is not distinct from" ,
912+ "between symmetric" ,
913+ "is distinct from" ,
914+ "not similar to" ,
915+ "at time zone" ,
916+ "not between" ,
917+ "similar to" ,
918+ "not ilike" ,
919+ "not like" ,
920+ "overlaps" ,
921+ "between" ,
922+ "collate" ,
923+ "notnull" ,
924+ "is not" ,
925+ "not in" ,
926+ "isnull" ,
927+ "ilike" ,
928+ "like" ,
929+ "and" ,
930+ "not" ,
931+ "in" ,
932+ "is" ,
933+ "or" ,
934+ ] ;
907935
908- let mut keywords = all_keywords
936+ fn keyword_phrases_match ( ) -> String {
937+ let patterns: Vec < String > = KEYWORD_PHRASES
909938 . iter ( )
910- . map ( |k| k. to_lowercase ( ) )
911- . collect :: < Vec < _ > > ( ) ;
912- keywords. sort ( ) ;
939+ . map ( |p| p. replace ( ' ' , "\\ s+" ) )
940+ . collect ( ) ;
941+ format ! ( "(?i)\\ b({})\\ b" , patterns. join( "|" ) )
942+ }
913943
944+ fn operator_match ( ) -> String {
945+ let operator_patterns: Vec < String > = KEYWORD_OPERATORS
946+ . iter ( )
947+ . map ( |op| op. replace ( ' ' , "\\ s+" ) )
948+ . collect ( ) ;
949+ format ! ( "(?i)\\ b({})\\ b" , operator_patterns. join( "|" ) )
950+ }
951+
952+ fn keywords_match ( all_keywords : & [ String ] ) -> String {
953+ let mut keywords: Vec < String > = all_keywords. iter ( ) . map ( |k| k. to_lowercase ( ) ) . collect ( ) ;
954+ keywords. sort ( ) ;
914955 let keywords_joined = keywords. join ( "|" ) ;
915- let match_pattern = format ! ( "(?xi)\\ b({keywords_joined})\\ b" ) ;
956+ format ! ( "(?xi)\\ b({keywords_joined})\\ b" )
957+ }
916958
917- json[ "repository" ] [ "keywords" ] [ "patterns" ] [ 0 ] [ "match" ] =
918- serde_json:: Value :: String ( match_pattern) ;
959+ fn update_textmate_keywords ( all_keywords : & [ String ] ) -> Result < ( ) > {
960+ let tmlanguage_path = project_root ( ) . join ( "squawk-vscode/syntaxes/pgsql.tmLanguage.json" ) ;
961+ let content = std:: fs:: read_to_string ( & tmlanguage_path) ?;
962+ let mut json: serde_json:: Value = serde_json:: from_str ( & content) ?;
963+
964+ json[ "repository" ] [ "keywords" ] [ "patterns" ] [ 0 ] [ "match" ] = keyword_phrases_match ( ) . into ( ) ;
965+ json[ "repository" ] [ "keywords" ] [ "patterns" ] [ 1 ] [ "match" ] = operator_match ( ) . into ( ) ;
966+ json[ "repository" ] [ "keywords" ] [ "patterns" ] [ 2 ] [ "match" ] = keywords_match ( all_keywords) . into ( ) ;
919967
920968 let output = serde_json:: to_string_pretty ( & json) ?;
921969 std:: fs:: write ( & tmlanguage_path, format ! ( "{output}\n " ) ) ?;
@@ -964,3 +1012,32 @@ fn generate_tokens(tokens: &[(&'static str, &'static str)]) -> String {
9641012 let output = reformat ( file. to_string ( ) ) . replace ( "#[derive" , "\n #[derive" ) ;
9651013 format ! ( "{PRELUDE}{output}" )
9661014}
1015+
1016+ #[ cfg( test) ]
1017+ mod tests {
1018+ use super :: * ;
1019+
1020+ #[ track_caller]
1021+ fn assert_sorted ( list : & [ & str ] , name : & str ) {
1022+ for window in list. windows ( 2 ) {
1023+ let ( a, b) = ( window[ 0 ] , window[ 1 ] ) ;
1024+ assert ! (
1025+ a. len( ) >= b. len( ) ,
1026+ "{name} not sorted by length descending: \
1027+ {a:?} (len {}) comes before {b:?} (len {})",
1028+ a. len( ) ,
1029+ b. len( ) ,
1030+ ) ;
1031+ }
1032+ }
1033+
1034+ #[ test]
1035+ fn keyword_operators_sorted_by_length_desc ( ) {
1036+ assert_sorted ( KEYWORD_OPERATORS , "KEYWORD_OPERATORS" ) ;
1037+ }
1038+
1039+ #[ test]
1040+ fn keyword_phrases_sorted_by_length_desc ( ) {
1041+ assert_sorted ( KEYWORD_PHRASES , "KEYWORD_PHRASES" ) ;
1042+ }
1043+ }
0 commit comments